1、該網站的ppt連結全部都在頁面上,用正則手動提取所有連結,放在指定位置的,以txt形式儲存,格式如下
2、寫個java檔案處理一下,如下:
1 package platform; 2 3 import java.io.BufferedInputStream; 4 import java.io.BufferedOutputStream; 5 import java.io.BufferedReader; 6 import java.io.File; 7 import java.io.FileInputStream; 8 import java.io.FileNotFoundException; 9 import java.io.FileOutputStream; 10 import java.io.IOException; 11 import java.io.InputStreamReader; 12 import java.io.UnsupportedEncodingException; 13 import java.net.HttpURLConnection; 14 import java.net.URL; 15 import java.util.HashMap; 16 import java.util.Map; 17 18 import org.apache.http.HttpResponse; 19 import org.apache.http.client.ClientProtocolException; 20 import org.apache.http.client.methods.HttpPost; 21 import org.apache.http.impl.client.DefaultHttpClient; 22 23 public class TestQConDownload { 24 25 public static void main(String[] args) { 26 BufferedReader bufferedReader; 27 String lineTxt = null; 28 String title="1"; 29 String url=""; 30 try { 31 //讀檔案 32 bufferedReader = readTxtFile("E:\\test\\downinfo.txt"); 33 //迴圈遍歷每行 34 while((lineTxt = bufferedReader.readLine()) != null){ 35 if(lineTxt.startsWith("【標題】")){ 36 title = lineTxt.substring(4).replaceAll(":", ""); 37 System.out.println(title); 38 } 39 if(lineTxt.startsWith("【下載地址】")){ 40 url= lineTxt.substring(6); 41 //獲取跳轉後的地址 42 url = getRedirectLocation(url); 43 System.out.println(url); 44 //下載到指定位置 45 downloadFile(url, "E:\\test\\download\\"+title+".pdf"); 46 } 47 } 48 bufferedReader.close(); 49 } catch (UnsupportedEncodingException e) { 50 // TODO Auto-generated catch block 51 e.printStackTrace(); 52 } catch (FileNotFoundException e) { 53 // TODO Auto-generated catch block 54 e.printStackTrace(); 55 } catch (IOException e) { 56 // TODO Auto-generated catch block 57 e.printStackTrace(); 58 } 59 60 } 61 62 public static String getRedirectLocation(String url) throws ClientProtocolException, IOException { 63 String SEND_MESSAGE_URL = url; 64 Map<String, Object> params = new HashMap<String, Object>(); 65 HttpPost get = new HttpPost(SEND_MESSAGE_URL); 66 get.setHeader("Cookie", "dx_un=%E5%B9%B4%E8%BD%BB%E7%9A%84%E7%96%AF%E5%AD%90; dx_avatar=http%3A%2F%2F7xil0e.com1.z0.glb.clouddn.com%2Fuser_580d84f25ea61.png; dx_token=0c6b719ffff50f3746b64f058cb4e719"); 67 get.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); 68 get.setHeader("Accept-Encoding", "zh-CN,zh;q=0.8"); 69 get.setHeader("Connection", "keep-alive"); 70 get.setHeader("Host", "ppt.geekbang.org"); 71 get.setHeader("Referer", "http://2016.qconshanghai.com/schedule"); 72 get.setHeader("Upgrade-Insecure-Requests", "1"); 73 get.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.87 Safari/537.36"); 74 75 // 設定編碼 76 HttpResponse re = new DefaultHttpClient().execute(get); 77 /*if (re.getStatusLine().getStatusCode() == 200) {// 如果狀態碼為200,就是正常返回 78 String result = EntityUtils.toString(re.getEntity()); 79 System.out.println(result); 80 }*/ 81 String location = re.getFirstHeader("Location").getValue(); 82 get.releaseConnection(); 83 return location; 84 } 85 86 /** 87 * 下載遠端檔案並儲存到本地 88 * @param remoteFilePath 遠端檔案路徑 89 * @param localFilePath 本地檔案路徑 90 */ 91 public static void downloadFile(String remoteFilePath, String localFilePath) 92 { 93 URL urlfile = null; 94 HttpURLConnection httpUrl = null; 95 BufferedInputStream bis = null; 96 BufferedOutputStream bos = null; 97 File f = new File(localFilePath); 98 try 99 { 100 urlfile = new URL(remoteFilePath); 101 httpUrl = (HttpURLConnection)urlfile.openConnection(); 102 httpUrl.connect(); 103 bis = new BufferedInputStream(httpUrl.getInputStream()); 104 bos = new BufferedOutputStream(new FileOutputStream(f)); 105 int len = 2048; 106 byte[] b = new byte[len]; 107 while ((len = bis.read(b)) != -1) 108 { 109 bos.write(b, 0, len); 110 } 111 bos.flush(); 112 bis.close(); 113 httpUrl.disconnect(); 114 } 115 catch (Exception e) 116 { 117 e.printStackTrace(); 118 } 119 finally 120 { 121 try 122 { 123 bis.close(); 124 bos.close(); 125 } 126 catch (IOException e) 127 { 128 e.printStackTrace(); 129 } 130 } 131 } 132 133 public static BufferedReader readTxtFile(String filePath) throws UnsupportedEncodingException, FileNotFoundException{ 134 String encoding="UTF-8"; 135 File file=new File(filePath); 136 InputStreamReader read = new InputStreamReader( 137 new FileInputStream(file),encoding);//考慮到編碼格式 138 BufferedReader bufferedReader = new BufferedReader(read); 139 return bufferedReader; 140 } 141 }