c#簡單實現提取網頁內容

iDotNetSpace發表於2009-11-30

下面的程式碼是從一個網路爬蟲程式中提取出來的,覺得有用,記錄下來。

程式碼
using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.IO;
using System.Net;
using System.Text;

namespace MyCsStudy
{
    class Program
    {
        ///


        /// 簡單網路爬蟲程式
        ///

        ///
        /// 編碼 可以為空
        ///
        public static string Fetch(string url, string charset)
        {
            Encoding encoding;
            HttpWebRequest request;
            HttpWebResponse response = null;
            Stream resStream = null;
            StreamReader sr = null;
            string result = string.Empty;
            try
            {
                request = (HttpWebRequest)HttpWebRequest.Create(url);
                response = (HttpWebResponse)request.GetResponse();
                resStream = response.GetResponseStream();
                if (!string.IsNullOrEmpty(charset))
                {
                    encoding = Encoding.GetEncoding(charset);
                }
                else if (!string.IsNullOrEmpty(response.CharacterSet))
                {
                    encoding = Encoding.GetEncoding(response.CharacterSet);
                }
                else
                {
                    encoding = Encoding.Default;
                }
                sr = new StreamReader(resStream, encoding);
                result = sr.ReadToEnd();
            }
            //catch (Exception ex)         
            //{            
            //    throw ex;
            //}           
            finally
            {
                if (sr != null)
                {
                    sr.Close();
                }
                if (resStream != null)
                {
                    resStream.Close();
                }
                if (response != null)
                {
                    response.Close();
                }
            }
            return result;
        }


        static void Main(string[] args)
        {
            string webSite=@"http://www.google.cn"; //這裡url必須帶上協議
            string strHTML = Fetch(webSite,null);
           
            Console.Write(strHTML);

            Console.ReadLine();
        }
    }
}

來自 “ ITPUB部落格 ” ,連結:http://blog.itpub.net/12639172/viewspace-621002/,如需轉載,請註明出處,否則將追究法律責任。

相關文章