繁簡體(GBBig5)字串轉化的JAVA方式實現

pingyuan發表於2007-01-25
Big5)字串轉化的JAVA方式實現" href="http://pingyuan.itpub.net/entry/19" rel="bookmark" snap_preview_added="no">繁簡體(GB<=>Big5)字串轉化的JAVA方式實現[@more@]本文提供一個java實現中文字元繁簡體互換的zip包以及主要的原始碼實現說明。
package zeal.util;
2
3import java.io.*;
4
5/**
6 * 用來處理GB2312/BIG5碼字元互相轉換的類.
7 * 需要兩個碼錶檔案:
8 * /zeal/util/gb-big5.table,/zeal/util/big5-gb.table.
9 * 這兩個碼錶可以根據具體情況補充對映不正確的碼.
10 *

Title: GB<=>Big5

11 *

Description: Deal with the convertion

12 * between gb2312 and big5 charset Strings.
13 *

Copyright: Copyright (c) 2004

14 *

Company: zeali.net

15 * @author Zeal Li
16 * @version 1.0
17 *
18 * @see zeal.util.StreamConverter
19 */
20
21public class GB2Big5{
22 private static GB2Big5 pInstance = null;
23 private String s_big5TableFile = null;
24 private String s_gbTableFile = null;
25 private byte[] b_big5Table = null;
26 private byte[] b_gbTable = null;
27
28 /** 指定兩個碼錶檔案來進行初始化 */
29 private GB2Big5(String sgbTableFile,String sbig5TableFile)
30 throws NullPointerException{
31 s_big5TableFile = sbig5TableFile;
32 s_gbTableFile = sgbTableFile;
33 if(null == b_gbTable){
34 b_gbTable = getBytesFromFile(sgbTableFile);
35 }
36 if(null == b_big5Table){
37 b_big5Table = getBytesFromFile(sbig5TableFile);
38 }
39 if(null == b_gbTable){
40 throw
41 new NullPointerException("No gb table can be load");
42 }
43 if(null == b_big5Table){
44 throw
45 new NullPointerException("No big5 table can be load");
46 }
47 }
48
49 public static synchronized GB2Big5 getInstance(){
50 //return getInstance("d:gb-big5.table","d:big5-gb.table");
51 return getInstance("/zeal/util/gb-big5.table",
52 "/zeal/util/big5-gb.table");
53 }
54
55 public static synchronized GB2Big5 getInstance(String sgbTableFile,
56 String sbig5TableFile){
57 if(null == pInstance){
58 try{
59 pInstance = new GB2Big5(sgbTableFile,sbig5TableFile);
60 }
61 catch(Exception e){
62 System.err.println(e.toString());
63 pInstance = null;
64 }
65 }
66
67 return pInstance;
68 }
69
70 /** 把gbChar對應的big5字元替換掉,用來更新碼錶檔案.
71 * 一般當發現字元對映不正確的時候可以透過這個方法來校正. */
72 protected synchronized void resetBig5Char(String gbChar,String big5Char)
73 throws Exception{
74 byte[] Text = new String(gbChar.getBytes(),"GBK").getBytes("GBK");
75 byte[] TextBig5 = new String(big5Char.getBytes(),
76 "BIG5").getBytes("BIG5");
77 int max = Text.length - 1;
78 int h = 0;
79 int l = 0;
80 int p = 0;
81 int b = 256;
82 byte[] big = new byte[2];
83 for(int i = 0; i < max; i++){
84 h = (int) (Text[i]);
85 if(h < 0){
86 h = b + h;
87 l = (int) (Text[i + 1]);
88 if(l < 0){
89 l = b + (int) (Text[i + 1]);
90 }
91 if(h == 161 && l == 64){
92 ; // do nothing
93 }
94 else{
95 p = (h - 160) * 510 + (l - 1) * 2;
96 b_gbTable[p] = TextBig5[i];
97 b_gbTable[p + 1] = TextBig5[i + 1];
98 }
99 i++;
100 }
101 }
102
103 BufferedOutputStream pWriter =
104 new BufferedOutputStream(new FileOutputStream(s_gbTableFile));
105 pWriter.write(b_gbTable,0,b_gbTable.length);
106 pWriter.close();
107 }
108
109 /** 把big5Char對應的gb字元替換掉,用來更新碼錶檔案.
110 * 一般當發現字元對映不正確的時候可以透過這個方法來校正. */
111 protected synchronized void resetGbChar(String big5Char,String gbChar)
112 throws Exception{
113 byte[] TextGb =
114 new String(gbChar.getBytes(),"GBK").getBytes("GBK");
115 byte[] Text =
116 new String(big5Char.getBytes(),"BIG5").getBytes("BIG5");
117 int max = Text.length - 1;
118 int h = 0;
119 int l = 0;
120 int p = 0;
121 int b = 256;
122 byte[] big = new byte[2];
123 for(int i = 0; i < max; i++){
124 h = (int) (Text[i]);
125 if(h < 0){
126 h = b + h;
127 l = (int) (Text[i + 1]);
128 if(l < 0){
129 l = b + (int) (Text[i + 1]);
130 }
131 if(h == 161 && l == 64){
132 ; // do nothing
133 }
134 else{
135 p = (h - 160) * 510 + (l - 1) * 2;
136 b_big5Table[p] = TextGb[i];
137 b_big5Table[p + 1] = TextGb[i + 1];
138 }
139 i++;
140 }
141 }
142
143 BufferedOutputStream pWriter =
144 new BufferedOutputStream(new FileOutputStream(s_big5TableFile));
145 pWriter.write(b_big5Table,0,b_big5Table.length);
146 pWriter.close();
147 }
148
149 /** 把gb2312編碼的字串轉化成big5碼的位元組流 */
150 public byte[] gb2big5(String inStr) throws Exception{
151 if(null == inStr || inStr.length() <= 0){
152 return "".getBytes();
153 //return "";
154 }
155
156 byte[] Text =
157 new String(inStr.getBytes(),"GBK").getBytes("GBK");
158 int max = Text.length - 1;
159 int h = 0;
160 int l = 0;
161 int p = 0;
162 int b = 256;
163 byte[] big = new byte[2];
164 for(int i = 0; i < max; i++){
165 h = (int) (Text[i]);
166 if(h < 0){
167 h = b + h;
168 l = (int) (Text[i + 1]);
169 if(l < 0){
170 l = b + (int) (Text[i + 1]);
171 }
172 if(h == 161 && l == 64){
173 big[0] = big[1] = (byte) (161 - b);
174 }
175 else{
176 p = (h - 160) * 510 + (l - 1) * 2;
177 try{
178 big[0] = (byte) (b_gbTable[p] - b);
179 }
180 catch(Exception e){
181 big[0] = 45;
182 }
183 try{
184 big[1] = (byte) (b_gbTable[p + 1] - b);
185 }
186 catch(Exception e){
187 big[1] = 45;
188 }
189 }
190 Text[i] = big[0];
191 Text[i + 1] = big[1];
192 i++;
193 }
194
195 }
196
197 return Text;
198 //return new String(Text);
199 }
200
201 /** 把big5碼的字串轉化成gb2312碼的字串 */
202 public String big52gb(String inStr) throws Exception{
203 if(null == inStr || inStr.length() <= 0){
204 return "";
205 }
206
207 byte[] Text =
208 new String(inStr.getBytes(),"BIG5").getBytes("BIG5");
209 int max = Text.length - 1;
210 int h = 0;
211 int l = 0;
212 int p = 0;
213 int b = 256;
214 byte[] big = new byte[2];
215 for(int i = 0; i < max; i++){
216 h = (int) (Text[i]);
217 if(h < 0){
218 h = b + h;
219 l = (int) (Text[i + 1]);
220 if(l < 0){
221 l = b + (int) (Text[i + 1]);
222 }
223 if(h == 161 && l == 161){
224 big[0] = (byte) (161 - b);
225 big[1] = (byte) (64 - b);
226 }
227 else{
228 p = (h - 160) * 510 + (l - 1) * 2;
229 try{
230 big[0] = (byte) (b_big5Table[p] - b);
231 }
232 catch(Exception e){
233 big[0] = 45;
234 }
235 try{
236 big[1] = (byte) (b_big5Table[p + 1] - b);
237 }
238 catch(Exception e){
239 big[1] = 45;
240 }
241 }
242 Text[i] = big[0];
243 Text[i + 1] = big[1];
244 i++;
245 }
246
247 }
248
249 return new String(Text);
250 }
251
252 /** 把檔案讀入位元組陣列,讀取失敗則返回null */
253 private static byte[] getBytesFromFile(String inFileName){
254 try{
255 InputStream in =
256 GB2Big5.class.getResourceAsStream(inFileName);
257 byte[] sContent = StreamConverter.toByteArray(in);
258 in.close();
259 return sContent;
260
261 /*
262 java.io.RandomAccessFile inStream =
263 new java.io.RandomAccessFile(inFileName,"r");
264 byte[] sContent = new byte[ (int) (inStream.length())];
265 inStream.read(sContent);
266 inStream.close();
267 return sContent;
268 */
269 }
270 catch(Exception e){
271 e.printStackTrace();
272 return null;
273 }
274 }
275
276 public static void main(String[] args) throws Exception{
277 if(args.length < 2){
278 System.out.println(
279 "Usage: zeal.util.GB2Big5 [-gb | -big5] inputstring");
280 System.exit(1);
281 return;
282 }
283
284 boolean bIsGB = true;
285 String inStr = "";
286 for(int i = 0; i < args.length; i++){
287 if(args[i].equalsIgnoreCase("-gb")){
288 bIsGB = true;
289 }
290 else if(args[i].equalsIgnoreCase("-big5")){
291 bIsGB = false;
292 }
293 else{
294 inStr = args[i];
295 }
296 }
297
298 GB2Big5 pTmp = GB2Big5.getInstance();
299
300 String outStr = "";
301 if(bIsGB){
302 outStr = pTmp.big52gb(inStr);
303 }
304 else{
305 outStr = new String(pTmp.gb2big5(inStr),"BIG5");
306 }
307
308 System.out.println("String [" + inStr +
309 "] converted into:n[" + outStr + "]");
310 }
311}
直接呼叫GB2Big5只適用於對於少量字元的轉化,當需要對整個jsp頁面根據使用者需要進行編碼轉化
的時候,就需要使用到taglib的功能。
具體配置使用步驟如下:


1.在WEB-INF/目錄下增加GB2Big5Wrapper.tld檔案,內容如下:
xml version="1.0" encoding="ISO-8859-1"?>
2/td>
3 PUBLIC "-//Sun Microsystems, Inc.//DTD JSP Tag Library 1.2//EN"
4 ">
5
6<taglib>
7 <tlib-version>1.0tlib-version>
8 <jsp-version>1.2jsp-version>
9 <short-name>zealLishort-name>
10 <tag>
11 <name>GB2Big5Wrappername>
12 <tag-class>zeal.util.GB2Big5Wrappertag-class>
13 <attribute>
14 <name>isbig5name>
15 <rtexprvalue>truertexprvalue>
16 <type>booleantype>
17 attribute>
18 tag>
19

taglib>

2.在需要進行轉化的JSP頁面裡面加上:


任何你需要轉化的東西

比如test.jsp原始碼如下 => 
<%@ page
2contentType="text/html; charset=GBK"
3
4import="javax.servlet.http.HttpSession"
5import="java.util.*"
6import="com.zealLi.*"
7%>
8String encode = request.getParameter("encode");
9
10if(null == encode || encode.length() <= 0){
11 encode = "BIG5";
12}
13boolean isBig5 = false;
14String charset = "GB2312";
15
16if(encode.equalsIgnoreCase("BIG5")){
17 isBig5 = true;
18 charset = "BIG5";
19}
20String sInfo = "中文字型繁簡體轉化的測試。";
21%>uri="/WEB-INF/GB2Big5Wrapper.tld" prefix="zealLi"%>
22<zealLi:GB2Big5Wrapper isbig5="">
23<html>
24 <head>
25 <title>Jsp測試頁面title>
26 <meta http-equiv="Content-Type" content="text/html; charset=">
27 head>
28 <body>
29 <%
30 Calendar now = Calendar.getInstance();
31 out.println(now.get(Calendar.YEAR) + "." +
32 (now.get(Calendar.MONTH)+1) + "." +
33 now.get(Calendar.DAY_OF_MONTH) + "<p>");
34 %>
35 <p>
36 <%=sInfo%>
37 body>
38html>zealLi:GB2Big5Wrapper>

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/7199667/viewspace-893541/,如需轉載,請註明出處,否則將追究法律責任。

相關文章