JavaScript中編碼有三種方法:escape、encodeURI、encodeURIComponent,位址列中那些%XX就是漢字對應的位元組被encodeURI編碼格式轉了。一個位元組對應一個%
轉換方式如下:
public void doGet(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
String line = request.getQueryString();
System.out.println(line);
String isDecodeUrl = URLDecoder.decode(line, "UTF-8");//將帶漢字的encodeURI編碼轉換成位元組,然後用UTF-8格式對位元組解碼
System.out.println(isDecodeUrl);
String reg = "[\\u4e00-\\u9fa5]+";//漢字的unicode範圍
StringBuilder chineseWord = new StringBuilder();
Pattern p = Pattern.compile(reg);//匹配規則封裝
Matcher m = p.matcher(isDecodeUrl);//將匹配規則和要匹配的字串組合生成匹配引擎
while(m.find())
chineseWord.append(m.group());//用正則取得所有漢字
System.out.println(chineseWord.toString());
String cnm = URLEncoder.encode(chineseWord.toString(), "GBK");//將漢字用GBK編碼轉成位元組,然後用encodeURI進行編碼。
System.out.println(cnm);
isDecodeUrl = URLDecoder.decode(cnm, "GBK");//做個試驗驗證是不是能正常轉回來
System.out.println(isDecodeUrl);
response.setCharacterEncoding("GBK");
response.setHeader("Content-type","text/html;charset=GBK");
PrintWriter out = response.getWriter();
out.println(isDecodeUrl);
}