百度基木魚產品中心批次複製上傳

伟茂發表於2024-08-08
import pandas as pd
import requests


def fetch_page_data(page_num):
    cookies = "替換瀏覽器cookie"

    headers = {
        "Accept": "application/json, text/plain, */*",
        "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "no-cache",
        "Connection": "keep-alive",
        "Content-Type": "application/json;charset=UTF-8",
        "Origin": "https://isite.baidu.com",
        "Pragma": "no-cache",
        "Referer": "https://isite.baidu.com/",
        "Sec-Fetch-Dest": "empty",
        "Sec-Fetch-Mode": "cors",
        "Sec-Fetch-Site": "same-site",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
        "sec-ch-ua": ""Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"",
        "sec-ch-ua-mobile": "?0",
        "sec-ch-ua-platform": ""Windows"",
    }

    params = {
        "reqid": "瀏覽器的reqid",
    }

    json_data = {
        "dataType": 0,
        "userId": "自己的資料",
        "siteId": "自己的資料",
        "pagination": {
            "pageNum": page_num,
            "pageSize": 12,
        },
        "uniqId": "自己的資料",
        "selectCateId": None,
    }

    response = requests.post(
        "https://jzapi.baidu.com/mini/reset/pcsite/getProductList",
        params=params,
        cookies=cookies,
        headers=headers,
        json=json_data,
    )
    return response


def get_category_name_by_id(categories, category_id):
    for category in categories:
        if category["id"] == category_id:
            return category["name"]
    return None


def calculate_total_pages(total_items, items_per_page=12):
    return (total_items + items_per_page - 1) // items_per_page


def main():
    short_titles = []
    long_titles = []
    categories = []
    image_4_3 = []
    image_1_1 = []
    text_details = []
    image_details = []
    rich_text_details = []
    tags = []

    response = fetch_page_data(1)
    total_pages = calculate_total_pages(response.json()["data"]["totalNum"])

    for i in range(total_pages):
        response = fetch_page_data(i + 1)
        products = response.json()["data"]["list"]

        for product in products:
            short_titles.append(product["productTitle"])
            long_titles.append(product["productTitle"])
            categories.append(get_category_name_by_id(response.json()["data"]["cateList"], product["categoryId"]))

            images_4_3 = product["image"][0]["value"]
            image_4_3.append(";".join(img["url"] for img in images_4_3))

            images_1_1 = product["image"][1]["value"]
            image_1_1.append(";".join(img["url"] for img in images_1_1))

            text_details.append("")
            image_details.append(product["content"])
            rich_text_details.append("")
            tags.append(";".join(product["productExt"]["tags"]))

    data = {
        "產品短標題": short_titles,
        "產品長標題": long_titles,
        "產品分組": categories,
        "圖片 - 橫版圖4:3": image_4_3,
        "圖片 - 方形圖1:1": image_1_1,
        "產品詳情-文字": text_details,
        "產品詳情-圖片": rich_text_details,
        "產品詳情-圖文": image_details,
        "賣點標籤": tags
    }

    df = pd.DataFrame(data=data)
    df.to_excel("商品匯出.xlsx", index=False)


if __name__ == "__main__":
    main()

相關文章