import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class WebSpider1 {
public static String getContent(String urlStr, String s) {
StringBuilder sb = new StringBuilder();
try {
URL url = new URL(urlStr);
BufferedReader bf = new BufferedReader(new InputStreamReader(url.openStream(), s));
String temp = "";
while((temp=bf.readLine())!=null) {
sb.append(temp);
}
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return sb.toString();
}
public static List<String> getStr(String des, String regexStr){
Pattern p = Pattern.compile(regexStr);
Matcher m = p.matcher(des);
List<String> list = new ArrayList<String>();
while(m.find()) {
list.add(m.group(1));
}
return list;
}
public static void main(String[] args) {
String des = getContent("http://www.163.com", "gbk");
List<String> str = getStr(des,"href=\"([\\w\\s./:]*?)\"");
for(String temp : str) {
System.out.println(temp);
}
}
}