使用java 爬蟲

夢真_qt發表於2020-10-05

 Caused by: java.lang.RuntimeException: javax.net.ssl.SSLHandshakeException: sun.security.validator.ValidatorException: PKIX path building failed: sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested target

該報錯問題  是證照問題導致。

使用HttpURLConnection訪問https協議請求時.對SSL信任

參考連結:https://blog.csdn.net/zz153417230/article/details/80271155

 

 

package novel.spider.impl;

import java.io.BufferedReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.security.SecureRandom;
import java.util.ArrayList;
import java.util.List;

import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import novel.spider.entitys.Chapter;
import novel.spider.interfaces.IChapterSpider;
import novel.spider.junit.MyX509TrustManager;

public class AbstractChapterSpider implements IChapterSpider {
	protected String crawl(String url) throws Exception {
		
		
		SSLContext sslcontext = SSLContext.getInstance("SSL", "SunJSSE");//第一個引數為協議,第二個引數為提供者(可以預設)
		TrustManager[] tm = {new MyX509TrustManager()};
		sslcontext.init(null, tm, new SecureRandom());
		HostnameVerifier ignoreHostnameVerifier = new HostnameVerifier() {
			public boolean verify(String s, SSLSession sslsession) {
				System.out.println("WARNING: Hostname is not matched for cert.");
					return true;
			}
		};
		HttpsURLConnection.setDefaultHostnameVerifier(ignoreHostnameVerifier);
		HttpsURLConnection.setDefaultSSLSocketFactory(sslcontext.getSocketFactory());
		URL url2 = new URL(url);
		HttpURLConnection conn = (HttpURLConnection) url2.openConnection();
		InputStream in = (InputStream) conn.getInputStream();
//	       String encoding = conn.getContentEncoding();
//		   encoding = encoding == null ? "UTF-8" : encoding;
		   String resp="";
		   java.io.BufferedReader breader = new BufferedReader(
				     new InputStreamReader(in, "UTF-8"));
				   String str = breader.readLine();
				   while (str != null) {
				  resp+=str;
				    str= breader.readLine();
				   }

			return resp;
	}
	 
	
	
	@Override
	public List<Chapter> getsChapter(String url) {
		try {
			String result = crawl(url);
			Document doc = Jsoup.parse(result);
			//System.err.println(doc);
			Elements as = doc.select("div li a");
			List<Chapter> chapters = new ArrayList<>();
			for (Element a : as) {
				Chapter chapter = new Chapter();
				chapter.setTitle(a.text());
				chapter.setUrl("http://www.bxwx8.org" + a.attr("href"));
				chapters.add(chapter);
			}
			return chapters;
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}

}
package novel.spider.junit;

import java.util.List;

import org.junit.Test;

import novel.spider.entitys.Chapter;
import novel.spider.impl.DefaultChapterSpider;
import novel.spider.interfaces.IChapterSpider;

public class Testcase {
	
	
	 
	
	@Test
	public void test1() throws Exception {
		IChapterSpider spider = new DefaultChapterSpider();
		List<Chapter>  chapters  = spider.getsChapter("https://www.266ks.com/0_5/");
		for (Chapter chapter : chapters) {
			System.out.println(chapter);
		}
	}
}

 

相關文章