urllib庫

weixin_34162695發表於2018-06-02

1 urlopen()

給Python官網爬下來

# urlopen()
import urllib.request

response = urllib.request.urlopen('https://www.python.org')
print(response.read().decode('utf-8'))

2 檢視返回的型別

#檢視返回的型別
import urllib.request

response = urllib.request.urlopen('https://www.python.org')
print(type(response))

#<class 'http.client.HTTPResponse'>

這是一個HTTPResponse型別的物件,包含的方法有:
read()
readinto()
getheader(name)
getheaders()
fileno() 等。。。
屬性:msg
version
status
reason
debuglevel
closed
3 再來看一個例子

#再來看一個例子
import urllib.request

response = urllib.request.urlopen('https://www.python.org')
print(response.status)
print(response.getheaders())
print(response.getheader('Server'))

#200
#這裡的太長了,省略掉
#nginx

4 urlopen() 函式的API

#urlopen() 函式的API
urllib.request.urlopen(url, data=None, [timeout, ]*, 
    cafile=None, capath=None, cadefault=False, context=None)

5 data引數

import urllib.parse
import urllib.request

data = bytes(urllib.parse.urlencode({'word':'hello'}), encoding='utf8')
response = urllib.request.urlopen('http://httpbin.org/post', data=data)
print(response.read())

#執行結果不展示了

6 timeout引數

#這裡的timeout引數的意思是,程式1秒之後,
#伺服器依然沒有響應,就會丟擲URLError異常
import urllib.request

response = urllib.request.urlopen('http://httpbin.org/get', timeout=1)
print(response.read())

#結果不展示了
import socket
import urllib.request
import urllib.error

try:
    response = urllib.request.urlopen('http://httpbin.org/get', timeout=0.1)
    print(response.read())
except urllib.error.URLError as e:
    if isinstance(e.reason, socket.timeout):
        print('TIME OUT')

7 Request

class urllib.request.Request(url, data=None, headers={}, 
    origin_req_host=None, unverifiable=False, method=None)

看一個例子

傳入多個引數構建請求

# 傳入多個引數構建請求
from urllib import request, parse

url = 'http://httpbin.org/post'
headers = {
     'User-Agent':'Mozilla/4.0(compatible;MSIC 5.5;Windows NT)',
     'Host':'httpbin.org'
}
dict = {
    'name':'Germey'
}
data = bytes(parse.urlencode(dict), encoding='utf8')
req = request.Request(url=url, data=data, headers=headers, method='POST')
response = request.urlopen(req)
print(response.read().decode('utf-8'))

相關文章