【小專案】爬取上海票據交易所資料並寫入資料庫
I. 目標資料
II. 程式碼
#!/usr/bin/env python
#-- coding:utf-8 --
import json
import random
from datetime import datetime
import urllib3
import requests
from Py_CRAW_workspace_cloud.Craw_Project_Doing.SHPJS.ToMybase.mysqldb import MysqldbHelper
import time
conn = MysqldbHelper(
# 楊唯希
host = '127.0.0.1', port = 3306, user = 'root', password = '252324', database = 'finance_data', charset = 'utf8'
# 我的雲伺服器,已測通
# host = '119.45.44.147', port = 3306, user = 'root', password = '252324', database = 'db', charset = 'utf8')
# 費伺服器,OK
# host = '139.129.99.23', port = 3306, user = 'test', password = '@1004cyfm', database = 'piaoju114admin', charset = 'utf8'
)
def drop(conn):
sql = """
drop table shpjs_test;
"""
conn.cur.execute(sql)
# -- 初始化建立表,注意建立自增ID
def init(conn):
sql = '''CREATE TABLE `shpjs_test` (
`id` int NOT NULL PRIMARY KEY AUTO_INCREMENT,
`sts_Date` date DEFAULT NULL,
`ecds_MemNum` int DEFAULT NULL,
`trx_MemNum` int DEFAULT NULL,
`day_AcceptNum` int DEFAULT NULL,
`day_AcceptAmount` int DEFAULT NULL,
`day_DiscountNum` int DEFAULT NULL,
`day_DiscountAmount` int DEFAULT NULL,
`day_TradeNum` int DEFAULT NULL,
`day_TradeAmount` int DEFAULT NULL,
`year_AcceptNum` int DEFAULT NULL,
`year_AcceptAmount` int DEFAULT NULL,
`year_DiscountNum` int DEFAULT NULL,
`year_DiscountAmount` int DEFAULT NULL,
`year_TradeNum` int DEFAULT NULL,
`year_TradeAmount` int DEFAULT NULL,
`interestRate_6M` int DEFAULT NULL,
`interestRate_1Y` int DEFAULT NULL
)'''
conn.create(sql)
# -- 向新建立的表中,插入資料
def update_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount):
# 類似 insert into table values(欄位A的值,欄位B的值),這裡把值換成了變數,也就是INT
_sts_Date = sts_Date.replace("年","-").replace("月","-").replace("日","")
sql = "update shpjs_2020 set ecds_MemNum = " + str(ecds_MemNum) + "," + " trx_MemNum = " + str(trx_MemNum)+ "," + " day_AcceptNum = " + str(day_AcceptNum)+ "," + " day_AcceptAmount = " + str(day_AcceptAmount) + "," + " day_DiscountNum = " + str(day_DiscountNum) + "," + " day_DiscountAmount = " + str(day_DiscountAmount) + "," + " day_TradeNum = " + str(day_TradeNum) + "," + " day_TradeAmount = " + str(day_TradeAmount) + " where sts_Date ='" + _sts_Date + "';"
conn.cur.execute(sql)
conn.commit()
# -- 向新建立的表中,插入資料
def insert_into_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount):
# 類似 insert into table values(欄位A的值,欄位B的值),這裡把值換成了變數,也就是INT
_sts_Date = sts_Date.replace("年","-").replace("月","-").replace("日","")
sql = "INSERT INTO shpjs_2020(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount) VALUES("+"\'"+_sts_Date+"\'"+","+str(ecds_MemNum)+","+str(trx_MemNum)+","+str(day_AcceptNum)+","+str(day_AcceptAmount)+","+str(day_DiscountNum)+","+str(day_DiscountAmount)+","+str(day_TradeNum)+","+str(day_TradeAmount)+");"
conn.cur.execute(sql)
conn.commit()
# 刪除表的內容
def del_shpjs_test():
sql1 = "delete from shpjs_test;"
sql2 = "ALTER TABLE shpjs_test AUTO_INCREMENT = 1;"
conn.cur.execute(sql1)
conn.cur.execute(sql2)
conn.commit()
print("del_shpjs_test()執行完畢")
# conn.cur.close()
# conn.con.close()
# 更新yield資料
def update_yield(_sts_Date, interestRate_6M,interestRate_1Y):
sql = "update shpjs_2020 set interestRate_6M = " + str(interestRate_6M) + "," + " interestRate_1Y = " + str(interestRate_1Y) + " where sts_Date ='" + _sts_Date + "';"
conn.cur.execute(sql)
conn.commit()
# print('insert:', interestRate_6M,interestRate_1Y)
# 插入yield資料
def insert_into_yield(_sts_Date, interestRate_6M,interestRate_1Y):
sql = "insert into shpjs_test(sts_Date, interestRate_6M,interestRate_1Y) values('" + _sts_Date + "'," + str(interestRate_6M) + "," + str(interestRate_1Y) + ");"
conn.cur.execute(sql)
conn.commit()
# print('insert:', interestRate_6M,interestRate_1Y)
# 專案執行
def run_detail():
# 初始化資料庫表
# init(conn)
print("------------開始執行初始化資料------------")
# conn.cur
# 目標url
url = "http://www.shcpe.com.cn/shcpe-cos/overview/detail"
# 使用Cookie,跳過登陸操作
headers = {
"Cookie": "yikikata=15e2b6e1-e930dc0727e4349b9035044cd69ed7da; Hm_lvt_3a990a28b41506542951b1cb848a320b=1596696424,1597663655; _pk_ref.1.9730=%5B%22%22%2C%22%22%2C1608079677%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DoXGVLW72P7eCvHE2N7tjcvipB_bDUDhx2iT7wjA5d5a_WTj6IL3vw4V-_7i2Xjhh%26wd%3D%26eqid%3Ddafc4c75000943c9000000045fab96d1%22%5D; _pk_ses.1.9730=1; _pk_id.1.9730=b02f320de122464e.1601275883.83.1608086140.1608079677.",
"Host": "www.shcpe.com.cn",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "109",
"Content-Type": "application/json;charset=UTF-8",
"Origin": "http://www.shcpe.com.cn",
"Cache-Control": "max-age=0"
}
time_now_start = datetime.now().strftime(
'%Y/%m/%d %H:%M:%S')
print("------------開始抓資料------------")
print("抓取開始,此時:", time_now_start)
playload = {
# requestNo的問題
"requestNo": "1608087313321",
"requestSystem": "shcpe-cos",
}
req = requests.post(url=url, headers=headers, json=playload)
print(req)
req_json = req.json()
print(req_json)
# print(type(req_json)) # <class 'dict'>
sts_Date = req_json['data']['marketOverviewHead']['stsDate']
ecds_MemNum = req_json['data']['marketOverviewHead']['ecdsMemNum'] # ECDS系統參與者數
trx_MemNum = req_json['data']['marketOverviewHead']['trxMemNum'] # 交易系統上線會員數
day_AcceptNum = req_json['data']['marketOverviewDay']['dayAcceptNum'] # ECDS系統參與者數
day_AcceptAmount = req_json['data']['marketOverviewDay']['dayAcceptAmount'] # ECDS系統參與者數
day_DiscountNum = req_json['data']['marketOverviewDay']['dayDiscountNum'] # ECDS系統參與者數
day_DiscountAmount = req_json['data']['marketOverviewDay']['dayDiscountAmount'] # ECDS系統參與者數
day_TradeNum = req_json['data']['marketOverviewDay']['dayTradeNum'] # ECDS系統參與者數
day_TradeAmount = req_json['data']['marketOverviewDay']['dayTradeAmount'] # ECDS系統參與者數
year_AcceptNum = req_json['data']['marketOverviewYear']['yearAcceptNum'] # ECDS系統參與者數
year_AcceptAmount = req_json['data']['marketOverviewYear']['yearAcceptAmount'] # ECDS系統參與者數
year_DiscountNum = req_json['data']['marketOverviewYear']['yearDiscountNum'] # ECDS系統參與者數
year_DiscountAmount = req_json['data']['marketOverviewYear']['yerDiscountAmount'] # ECDS系統參與者數
year_TradeNum = req_json['data']['marketOverviewYear']['yearTradeNum'] # ECDS系統參與者數
year_TradeAmount = req_json['data']['marketOverviewYear']['yearTradeAmount'] # ECDS系統參與者數
# insert_into_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount)
update_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount)
print(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount)
# time.sleep(random.randint(1, 3))
# time.sleep(10)
# conn.cur.close()
# conn.con.close()
# 爬yield
def run_yield():
# 初始化資料庫表
# init(conn)
print("------------開始執行初始化資料------------")
# conn.cur
# 目標url
url = "http://www.shcpe.com.cn/shcpe-cos/yield/mark"
# 使用Cookie,跳過登陸操作
headers = {
"Cookie": "_pk_id.1.9730=0c12b92fba9b8422.1602731442.1.1602731493.1602731442.; _pk_ses.1.9730=1",
"Host": "www.shcpe.com.cn",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0",
"Accept-Encoding": "gzip, deflate",
"Accept-Language": "zh-CN,zh;q=0.9",
"Connection": "keep-alive",
"Content-Length": "109",
"Content-Type": "application/json;charset=UTF-8",
"Origin": "http://www.shcpe.com.cn",
"Cache-Control": "max-age=0"
}
#
#
time_now_start = datetime.now().strftime(
'%Y/%m/%d %H:%M:%S')
#
print("------------開始抓資料------------")
print("抓取yield開始,此時:", time_now_start)
playload = {
"curveCode": "001",
"requestNo": "1602739497044",
"requestSystem": "shcpe-cos"
# "staticsTime" : "2020-12-10"
}
#
req = requests.post(url=url, headers=headers, json=playload)
req_json = req.json()
#
# print(type(req_json)) # <class 'dict'>
sts_Date = req_json['data']['stsDate']
# 2020.12.09
_sts_Date = sts_Date.replace('.','-')
# print(_sts_Date)
interestRate_6M = req_json['data']['dataList'][7]['interestRate'] # ECDS系統參與者數
interestRate_1Y = req_json['data']['dataList'][13]['interestRate'] # ECDS系統參與者數
# 更新用
update_yield(_sts_Date, interestRate_6M, interestRate_1Y)
# 插入用
# insert_into_yield(_sts_Date, interestRate_6M, interestRate_1Y)
print(_sts_Date, interestRate_6M, interestRate_1Y)
# 注意是插入函式還是更新函式
# run_detail()
run_yield()
III. 成功結果
相關文章
- python爬取股票資料並存到資料庫Python資料庫
- 創新專案實訓:資料爬取
- 爬取 Boss 直聘網上海區域 PHP 職位資訊資料並分析PHP
- python爬蟲簡歷專案怎麼寫_爬蟲專案咋寫,爬取什麼樣的資料可以作為專案寫在簡歷上?...Python爬蟲
- Python3爬蟲資料入資料庫---把爬取到的資料存到資料庫,帶資料庫去重功能Python爬蟲資料庫
- 如何將 EXCEL 資料寫入資料庫Excel資料庫
- 房產資料爬取、智慧財產權資料爬取、企業工商資料爬取、抖音直播間資料python爬蟲爬取Python爬蟲
- 爬取LeetCode資料,生成README檔案,美化GitHub倉庫LeetCodeGithub
- 爬蟲新手入門實戰專案(爬取筆趣閣小說並下載)爬蟲
- 快速爬取登入網站資料網站
- 企業資料爬蟲專案爬蟲
- golang讀取檔案的json資料流,並解析到struct,儲存到資料庫GolangJSONStruct資料庫
- python 爬蟲 5i5j房屋資訊 獲取並儲存到資料庫Python爬蟲資料庫
- Python爬取股票資訊,並實現視覺化資料Python視覺化
- python——將excel檔案寫入mysql資料庫中PythonExcelMySql資料庫
- Python 利用pandas和mysql-connector獲取Excel資料寫入到MySQL資料庫PythonMySqlExcel資料庫
- 從cmd中匯入.SQL檔案並建立資料庫SQL資料庫
- 【python】爬取疫情資料並進行視覺化Python視覺化
- gin框架,讀取檔案的json資料流,並解析到struct,儲存到資料庫框架JSONStruct資料庫
- 資料分析專案(一)——爬蟲篇爬蟲
- 企業資料爬蟲專案(二)爬蟲
- 成功使用Eclipse匯入專案,並實驗Mybatis操作了資料庫內容。EclipseMyBatis資料庫
- python爬蟲小專案--飛常準航班資訊爬取variflight(上)Python爬蟲
- 使用scrapy框架把資料非同步寫入資料庫框架非同步資料庫
- Python爬蟲入門【3】:美空網資料爬取Python爬蟲
- 客快物流大資料專案(五十一):資料庫表分析 物流專案 資料庫表設計大資料資料庫
- java資料list寫入檔案Java
- Python爬蟲之小說資訊爬取與資料視覺化分析Python爬蟲視覺化
- django建立專案案例1獲取並傳遞資料續04Django
- django基礎--02基於資料庫的小專案Django資料庫
- 用 ABAP 新建本地 Excel 檔案並寫入資料試讀版Excel
- 在Django中,多資料操作,你可以編寫測試來查詢另一個資料庫伺服器中的資料,並將結果匯入當前Django專案的資料庫表中Django資料庫伺服器
- 利用python編寫爬蟲爬取淘寶奶粉部分資料.1Python爬蟲
- Redis快取資料庫-快速入門Redis快取資料庫
- 大資料爬蟲專案實戰教程大資料爬蟲
- PySpider爬取去哪兒攻略資料專案IDE
- 優炫資料庫入圍中海油信創資料庫軟體採購專案資料庫
- 上海票據交易所:2018上半年票據市場執行情況