【小專案】爬取上海票據交易所資料並寫入資料庫

sevieryang發表於2020-12-17

爬取上海票據交易所資料並寫入資料庫

I. 目標資料

在這裡插入圖片描述
在這裡插入圖片描述

II. 程式碼


#!/usr/bin/env python
#-- coding:utf-8 --

import json
import random
from datetime import datetime
import urllib3
import requests
from Py_CRAW_workspace_cloud.Craw_Project_Doing.SHPJS.ToMybase.mysqldb import MysqldbHelper
import time

conn = MysqldbHelper(

    # 楊唯希
    host = '127.0.0.1', port = 3306, user = 'root', password = '252324', database = 'finance_data', charset = 'utf8'

    # 我的雲伺服器,已測通
    # host = '119.45.44.147', port = 3306, user = 'root', password = '252324', database = 'db', charset = 'utf8')

    # 費伺服器,OK
    # host = '139.129.99.23', port = 3306, user = 'test', password = '@1004cyfm', database = 'piaoju114admin', charset = 'utf8'
)


def drop(conn):
    sql = """
        drop table shpjs_test;
    """
    conn.cur.execute(sql)

# -- 初始化建立表,注意建立自增ID
def init(conn):

    sql = '''CREATE TABLE `shpjs_test` (
              `id` int NOT NULL PRIMARY KEY AUTO_INCREMENT,
              `sts_Date` date DEFAULT NULL,
              `ecds_MemNum` int DEFAULT NULL,
              `trx_MemNum` int DEFAULT NULL,
              `day_AcceptNum` int DEFAULT NULL,
              `day_AcceptAmount` int DEFAULT NULL,
              `day_DiscountNum` int DEFAULT NULL,
              `day_DiscountAmount` int DEFAULT NULL,
              `day_TradeNum` int DEFAULT NULL,
              `day_TradeAmount` int DEFAULT NULL,
              `year_AcceptNum` int DEFAULT NULL,
              `year_AcceptAmount` int DEFAULT NULL,
              `year_DiscountNum` int DEFAULT NULL,
              `year_DiscountAmount` int DEFAULT NULL,
              `year_TradeNum` int DEFAULT NULL,
              `year_TradeAmount` int DEFAULT NULL,
              `interestRate_6M` int DEFAULT NULL,
              `interestRate_1Y` int DEFAULT NULL
            )'''

    conn.create(sql)

# -- 向新建立的表中,插入資料
def update_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount):


    # 類似 insert into table values(欄位A的值,欄位B的值),這裡把值換成了變數,也就是INT


    _sts_Date = sts_Date.replace("年","-").replace("月","-").replace("日","")

    sql = "update shpjs_2020 set ecds_MemNum = " + str(ecds_MemNum) + "," + " trx_MemNum = " + str(trx_MemNum)+ "," + " day_AcceptNum = " + str(day_AcceptNum)+ "," + " day_AcceptAmount = " + str(day_AcceptAmount) + "," + " day_DiscountNum = " + str(day_DiscountNum) + "," + " day_DiscountAmount = " + str(day_DiscountAmount) + "," + " day_TradeNum = " + str(day_TradeNum) + "," + " day_TradeAmount = " + str(day_TradeAmount) + " where sts_Date ='" + _sts_Date + "';"


    conn.cur.execute(sql)
    conn.commit()

# -- 向新建立的表中,插入資料
def insert_into_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount):


# 類似 insert into table values(欄位A的值,欄位B的值),這裡把值換成了變數,也就是INT


    _sts_Date = sts_Date.replace("年","-").replace("月","-").replace("日","")

    sql = "INSERT INTO shpjs_2020(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount) VALUES("+"\'"+_sts_Date+"\'"+","+str(ecds_MemNum)+","+str(trx_MemNum)+","+str(day_AcceptNum)+","+str(day_AcceptAmount)+","+str(day_DiscountNum)+","+str(day_DiscountAmount)+","+str(day_TradeNum)+","+str(day_TradeAmount)+");"
    conn.cur.execute(sql)
    conn.commit()


# 刪除表的內容
def del_shpjs_test():

    sql1 = "delete from shpjs_test;"
    sql2 = "ALTER TABLE shpjs_test AUTO_INCREMENT = 1;"
    conn.cur.execute(sql1)
    conn.cur.execute(sql2)
    conn.commit()
    print("del_shpjs_test()執行完畢")
    # conn.cur.close()
    # conn.con.close()



# 更新yield資料
def update_yield(_sts_Date, interestRate_6M,interestRate_1Y):

    sql = "update shpjs_2020 set interestRate_6M = " + str(interestRate_6M) + "," + " interestRate_1Y = " + str(interestRate_1Y) + " where sts_Date ='" + _sts_Date + "';"

    conn.cur.execute(sql)
    conn.commit()
    # print('insert:', interestRate_6M,interestRate_1Y)

# 插入yield資料
def insert_into_yield(_sts_Date, interestRate_6M,interestRate_1Y):

    sql = "insert into shpjs_test(sts_Date, interestRate_6M,interestRate_1Y) values('" + _sts_Date + "'," + str(interestRate_6M) + "," + str(interestRate_1Y) + ");"


    conn.cur.execute(sql)
    conn.commit()
    # print('insert:', interestRate_6M,interestRate_1Y)


# 專案執行
def run_detail():
    # 初始化資料庫表

    # init(conn)
    print("------------開始執行初始化資料------------")
    # conn.cur

    # 目標url
    url = "http://www.shcpe.com.cn/shcpe-cos/overview/detail"

    # 使用Cookie,跳過登陸操作
    headers = {
        "Cookie": "yikikata=15e2b6e1-e930dc0727e4349b9035044cd69ed7da; Hm_lvt_3a990a28b41506542951b1cb848a320b=1596696424,1597663655; _pk_ref.1.9730=%5B%22%22%2C%22%22%2C1608079677%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DoXGVLW72P7eCvHE2N7tjcvipB_bDUDhx2iT7wjA5d5a_WTj6IL3vw4V-_7i2Xjhh%26wd%3D%26eqid%3Ddafc4c75000943c9000000045fab96d1%22%5D; _pk_ses.1.9730=1; _pk_id.1.9730=b02f320de122464e.1601275883.83.1608086140.1608079677.",
        "Host": "www.shcpe.com.cn",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3861.400 QQBrowser/10.7.4313.400",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        "Content-Length": "109",
        "Content-Type": "application/json;charset=UTF-8",
        "Origin": "http://www.shcpe.com.cn",
        "Cache-Control": "max-age=0"
    }


    time_now_start = datetime.now().strftime(
        '%Y/%m/%d %H:%M:%S')

    print("------------開始抓資料------------")
    print("抓取開始,此時:", time_now_start)

    playload = {

        # requestNo的問題
        "requestNo": "1608087313321",
        "requestSystem": "shcpe-cos",
        
    }

    req = requests.post(url=url,  headers=headers, json=playload)
    print(req)
    req_json = req.json()
    print(req_json)

    # print(type(req_json)) # <class 'dict'>

    sts_Date = req_json['data']['marketOverviewHead']['stsDate']
    ecds_MemNum = req_json['data']['marketOverviewHead']['ecdsMemNum'] # ECDS系統參與者數
    trx_MemNum = req_json['data']['marketOverviewHead']['trxMemNum'] # 交易系統上線會員數

    day_AcceptNum = req_json['data']['marketOverviewDay']['dayAcceptNum'] # ECDS系統參與者數
    day_AcceptAmount = req_json['data']['marketOverviewDay']['dayAcceptAmount'] # ECDS系統參與者數
    day_DiscountNum = req_json['data']['marketOverviewDay']['dayDiscountNum'] # ECDS系統參與者數
    day_DiscountAmount = req_json['data']['marketOverviewDay']['dayDiscountAmount'] # ECDS系統參與者數
    day_TradeNum = req_json['data']['marketOverviewDay']['dayTradeNum'] # ECDS系統參與者數
    day_TradeAmount = req_json['data']['marketOverviewDay']['dayTradeAmount'] # ECDS系統參與者數

    year_AcceptNum = req_json['data']['marketOverviewYear']['yearAcceptNum'] # ECDS系統參與者數
    year_AcceptAmount = req_json['data']['marketOverviewYear']['yearAcceptAmount'] # ECDS系統參與者數
    year_DiscountNum = req_json['data']['marketOverviewYear']['yearDiscountNum'] # ECDS系統參與者數
    year_DiscountAmount = req_json['data']['marketOverviewYear']['yerDiscountAmount'] # ECDS系統參與者數
    year_TradeNum = req_json['data']['marketOverviewYear']['yearTradeNum'] # ECDS系統參與者數
    year_TradeAmount = req_json['data']['marketOverviewYear']['yearTradeAmount'] # ECDS系統參與者數


    # insert_into_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount)
    update_detail(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount)

    print(sts_Date,ecds_MemNum,trx_MemNum,day_AcceptNum,day_AcceptAmount,day_DiscountNum,day_DiscountAmount,day_TradeNum,day_TradeAmount)

    # time.sleep(random.randint(1, 3))
    # time.sleep(10)
    # conn.cur.close()
    # conn.con.close()


# 爬yield
def run_yield():
    # 初始化資料庫表

    # init(conn)
    print("------------開始執行初始化資料------------")
    # conn.cur

    # 目標url
    url = "http://www.shcpe.com.cn/shcpe-cos/yield/mark"

    # 使用Cookie,跳過登陸操作
    headers = {
        "Cookie": "_pk_id.1.9730=0c12b92fba9b8422.1602731442.1.1602731493.1602731442.; _pk_ses.1.9730=1",
        "Host": "www.shcpe.com.cn",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Connection": "keep-alive",
        "Content-Length": "109",
        "Content-Type": "application/json;charset=UTF-8",
        "Origin": "http://www.shcpe.com.cn",
        "Cache-Control": "max-age=0"
    }
#
#
    time_now_start = datetime.now().strftime(
        '%Y/%m/%d %H:%M:%S')
#
    print("------------開始抓資料------------")
    print("抓取yield開始,此時:", time_now_start)

    playload = {
        "curveCode": "001",
        "requestNo": "1602739497044",
        "requestSystem": "shcpe-cos"
        # "staticsTime" : "2020-12-10"

    }
#
    req = requests.post(url=url,  headers=headers, json=playload)
    req_json = req.json()
#
    # print(type(req_json)) # <class 'dict'>

    sts_Date = req_json['data']['stsDate']
    # 2020.12.09
    _sts_Date = sts_Date.replace('.','-')
    # print(_sts_Date)
    interestRate_6M = req_json['data']['dataList'][7]['interestRate'] # ECDS系統參與者數

    interestRate_1Y = req_json['data']['dataList'][13]['interestRate'] # ECDS系統參與者數


    # 更新用
    update_yield(_sts_Date, interestRate_6M, interestRate_1Y)

    # 插入用
    # insert_into_yield(_sts_Date, interestRate_6M, interestRate_1Y)

    print(_sts_Date, interestRate_6M, interestRate_1Y)




# 注意是插入函式還是更新函式

# run_detail()
run_yield()





III. 成功結果

在這裡插入圖片描述

相關文章