基於python的新浪微博模擬登陸薦

TANGLIE1993發表於2015-02-06

主檔案如下：

#coding=utf-8
import requests
import urllib
import urllib2
import cookielib  
import WeiboEncode
import WeiboSearch 
import time
import re
import random
import httplib
class WeiboLogin:
def __init__(self, user, pwd, enableProxy = False):#構造方法，引數依次是自身、使用者、密碼、是否使用代理伺服器
"初始化WeiboLogin，enableProxy表示是否使用代理伺服器，預設關閉"  
print "Initializing WeiboLogin..."
self.userName = user
self.passWord = pwd
self.enableProxy = enableProxy
self.serverUrl = "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback=sinaSSOController.preloginCallBack&su=&rsakt=mod&client=ssologin.js(v1.4.11)&_=1379834957683"
self.loginUrl = "http://login.sina.com.cn/sso/login.php?client=ssologin.js(v1.4.11)"
self.postHeader = {`User-Agent`: `Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0`}
#使用者代理 User Agent，是指瀏覽器,它的資訊包括硬體平臺、系統軟體、應用軟體和使用者個人偏好。
def Login(self):#登陸程式
"登陸程式"  
self.EnableCookie(self.enableProxy)#cookie或代理伺服器配置
serverTime, nonce, pubkey, rsakv = self.GetServerTime()#登陸的第一步
postData = WeiboEncode.PostEncode(self.userName, self.passWord, serverTime, nonce, pubkey, rsakv)#加密使用者和密碼
print "Post data length:
", len(postData)  
req = urllib2.Request(self.loginUrl, postData, self.postHeader)#構造網路請求
print "Posting request..."
result = urllib2.urlopen(req)#發出網路請求
text = result.read()
try:
loginUrl = WeiboSearch.sRedirectData(text)#解析重定位結果（登陸後自動跳轉到的頁面）
urllib2.urlopen(loginUrl)
except:
print `Login error!`
return False
print `Login sucess!`
return True
def EnableCookie(self, enableProxy):#"Enable cookie & proxy (if needed)."
cookiejar = cookielib.LWPCookieJar()#建立cookie
cookie_support = urllib2.HTTPCookieProcessor(cookiejar)  
#HTTPCookieProcessor instances have one attribute:
#HTTPCookieProcessor.cookiejar (The cookielib.CookieJar in which cookies are stored.)
if enableProxy:
proxy_support = urllib2.ProxyHandler({`http`:`59.59.100.123:8118`})#使用代理
opener = urllib2.build_opener(proxy_support, cookie_support, urllib2.HTTPHandler)#Return an OpenerDirector instance
#The OpenerDirector class opens URLs via BaseHandlers chained together.
print "Proxy enabled"
else:
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler) 
urllib2.install_opener(opener)#構建cookie對應的opener
def GetServerTime(self):#"Get server time and nonce, which are used to encode the password"
#在摘要認證中伺服器讓客戶選一個隨機數（稱作”nonce“），然後瀏覽器使用一個單向的加密函式生成一個訊息摘要（message #digest），該摘要是關於使用者名稱、密碼、給定的nonce值、HTTP方法，以及所請求的URL。
print "Getting server time and nonce..."
serverData = urllib2.urlopen(self.serverUrl).read()#得到網頁內容
print serverData
try:
serverTime, nonce, pubkey, rsakv = WeiboSearch.sServerData(serverData)#解析得到serverTime，nonce等
return serverTime, nonce, pubkey, rsakv
except:
print `Get server time & nonce error!`
return None
def fetch_weibo(id, filename):#不借助API取回微博列表，但只有前幾條，引數分別為使用者ID、檔名
target = open(filename, `a`)
myurl=`http://weibo.com/u/`+id
line = urllib2.urlopen(myurl).read()
target.write(line)
if re.search(r`"WB_detail`, line):
print "success"
p = re.compile(r`"WB_detail"`)
linelist = p.split(line)
for fraction in linelist:
matchObj = re.search(r`nick-name=".+?">\n +(.+?)<`, fraction)
if matchObj:
target.write(matchObj.group(1))
target.write("
")
def fetchqueryresult():#本方法可取回微博找人的查詢結果
myurl="http://s.weibo.com/user/&auth=ord&age=22y&gender=women&region=custom:33:1&page="#找人頁面的url
target = open("filename", `a`)#輸出檔名稱
for i in range(37,51):#起止頁碼
line = urllib2.urlopen(myurl).read()
while re.search(r`ids=(d+?)\`, line):
matchObj = re.search(r`ids=(d+?)\`, line)
print matchObj.group(1)
target.write(matchObj.group(1))
target.write("
")
p = re.compile(r``+matchObj.group(1))
linelist = p.split(line)
line = linelist[len(linelist)-1]
print i
time.sleep(2+random.random());
def getjson():#本方法可呼叫微博API，取回已登入使用者的微博列表
     
headers = {`User-Agent`: `Mozilla/5.0 (Windows NT 6.1; rv:24.0) Gecko/20100101 Firefox/24.0`}#定義一些檔案頭
url = "https://api.weibo.com/2/statuses/user_timeline.json"  # 這裡是url
your_param = {`source`: `1675437817`}  # 這裡是請求引數！
result = requests.get(url, params=your_param)  # 傳送請求，如果url是http://s.weibo.com/weibo/s 那麼 這句話的的效果就是 http://s.weibo.com/weibo/s?Refer=sina_index
result_final = result.text  #這樣就獲取到了你傳送的這個URL + 引數 之後的結果
print result.text
if __name__ == `__main__`:
#if the python interpreter is running that module (the source file) as the main program, 
#it sets the special __name__ variable to have a value #"__main__".
#If this file is being imported from another module,
#__name__ will be set to the module`s name.
weiboLogin = WeiboLogin(`tanglie23@163.com`, `XXXXXXXX`)#郵箱（賬號）、密碼
if weiboLogin.Login() == True:
print "登陸成功！"
myurl="http://api.weibo.com/2/statuses/timeline_batch.json?source=1675437817&uids=5029941840"
htmlContent = urllib2.urlopen(myurl).read()
print htmlContent

另外要用到的兩個類是WeiboSearch.py和WeiboEncode.py。

WeiboEncode.py程式碼如下：

#coding=utf-8
import urllib
import base64
import rsa
import binascii  
def PostEncode(userName, passWord, serverTime, nonce, pubkey, rsakv):
    "Used to generate POST data"
    encodedUserName = GetUserName(userName)#使用者名稱使用base64加密
    encodedPassWord = get_pwd(passWord, serverTime, nonce, pubkey)#目前密碼採用rsa加密
    postPara = {
        `entry`: `weibo`,
        `gateway`: `1`,
        `from`: ``,
        `savestate`: `7`,
        `userticket`: `1`,
        `ssosimplelogin`: `1`,
        `vsnf`: `1`,
        `vsnval`: ``,
        `su`: encodedUserName,
        `service`: `miniblog`,
        `servertime`: serverTime,
        `nonce`: nonce,
        `pwencode`: `rsa2`,
        `sp`: encodedPassWord,
        `encoding`: `UTF-8`,
        `prelt`: `115`,
        `rsakv`: rsakv,     
        `url`: `http://weibo.com/ajaxlogin.php?framelogin=1&callback=parent.sinaSSOController.feedBackUrlCallBack`,
        `returntype`: `META`
    }
    postData = urllib.urlencode(postPara)#網路編碼
    return postData
 
def GetUserName(userName):
    "Used to encode user name"
    userNameTemp = urllib.quote(userName)
    userNameEncoded = base64.encodestring(userNameTemp)[:-1]
    return userNameEncoded  
 
def get_pwd(password, servertime, nonce, pubkey):
    rsaPublickey = int(pubkey, 16)
    key = rsa.PublicKey(rsaPublickey, 65537) #建立公鑰
    message = str(servertime) + `	` + str(nonce) + `
` + str(password) #拼接明文js加密檔案中得到
    passwd = rsa.encrypt(message, key) #加密
    passwd = binascii.b2a_hex(passwd) #將加密資訊轉換為16進位制。
    return passwd

WeiboSearch.py

#coding=utf-8
import re
import json  
def sServerData(serverData):#解析得到serverTime，nonce等
    "Search the server time & nonce from server data"
    p = re.compile(`((.*))`) #re.compile 可以把正規表示式編譯成一個正規表示式物件
    jsonData = p.search(serverData).group(1) #查詢
    data = json.loads(jsonData) #對encodedjson進行decode，得到原始資料，需要使用json.loads()函式
    serverTime = str(data[`servertime`])
    nonce = data[`nonce`]
    pubkey = data[`pubkey`]#
    rsakv = data[`rsakv`]#
    print "Server time is:", serverTime
    print "Nonce is:", nonce
    return serverTime, nonce, pubkey, rsakv
 
def sRedirectData(text):
    p = re.compile(`location.replace([`"](.*?)[`"])`)
    loginUrl = p.search(text).group(1)
    print `loginUrl:`,loginUrl
    return loginUrl

目前該爬蟲可以自動登入，以及呼叫新浪微博的普通API。但是批量取回他人的微博需要高階授權，目前正在申請。

Python模擬微博登陸，親測有效
2019-03-17
Python
PHP基於laravel框架獲取微博資料之一模擬新浪微博登入
2019-02-16
PHPLaravel框架
Python實現微博爬蟲，爬取新浪微博
2020-12-14
Python爬蟲
新浪微博動態 RSA 分析圖文+登入
2024-05-20
Python模擬登陸某網教師教育網
2018-04-02
Python
Node.js-Koa2框架生態實戰－從零模擬新浪微博
2020-12-20
Node.js框架
模擬部落格園登陸！
2019-07-21
通過session模擬登陸
2019-01-13
Session
使用OkHttp模擬登陸LeetCode
2018-12-12
HTTPLeetCode
爬蟲之普通的模擬登陸
2018-08-24
爬蟲
新浪微博分享不跳轉
2018-09-05
模擬登陸——以github為例
2020-10-01
Github
Android 第三方登入之新浪微博授權登入
2018-09-13
Android
基於JSP的微博
2019-02-14
JS
怎麼同時登陸多個新浪微博賬號，這個便捷方法一定要知道！
2020-05-25
微信授權註冊或微信登陸微信授權登陸基於若依vue 實現
2024-08-01
Vue
為爬蟲獲取登入cookies：使用Charles和requests模擬微博登入
2018-12-03
爬蟲Cookie
JB的Python之旅-爬蟲篇-新浪微博內容爬取
2018-06-30
Python爬蟲
新浪微博API生成短連結
2019-02-16
API
轉發新浪微博程式猿心得
2018-10-28
使用postman模擬登陸post請求方法
2020-11-10
Postman
使用Selenium模擬登陸百度盤
2020-12-08
Python-模擬登入
2018-11-30
Python
用不同的庫實現模擬登陸知乎！
2020-10-22
通過抓包實現Python模擬登陸各網站原理分析
2018-07-17
Python網站
Python 超簡單爬取新浪微博資料 (高階版)
2020-05-16
Python
新浪微博私信，即時聊天介面研究
2019-01-08
Scrapy框架的使用之Scrapy爬取新浪微博
2018-05-23
框架
利用Python模擬GitHub登入
2019-07-14
PythonGithub
新浪微博資料中心：2019微博電影白皮書（附下載）
2020-03-05
JS逆向實戰26——某店ua模擬登陸
2023-11-16
JS
《巴士模擬21》現已登陸PC、PlayStation®和Xbox！
2021-09-08
網站模擬登陸的滑塊驗證碼識別
2024-05-09
網站
Python網路爬蟲2 - 爬取新浪微博使用者圖片
2018-04-10
Python爬蟲
新浪微博app如何去喚起跳轉微信小程式
2021-05-29
APP微信小程式
新浪微博&艾漫資料：2019明星白皮書
2019-12-26
使用Scrapy抓取新浪微博使用者資訊
2019-02-16
基於carla和python的自動駕駛模擬系列3
2020-09-26
Python自動駕駛
Scrapy爬取新浪微博移動版使用者首頁第一條微博
2019-05-12

基於python的新浪微博模擬登陸薦

相關文章