非同步爬取畢業照

ゐ叶う枫ゆ發表於2024-06-30
import asyncio
import json
import os
import time

import aiofiles
import aiohttp

url_data = "https://www.yipai360.com/applet/v2/photo/select-page"
download_url = "https://c360-o2o.c360dn.com"
download_path = 'H:/images'

headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"
}


async def main(page, count):
    data = {
        'orderId': '202406061436516915',
        'sortType': 'desc',
        'pageNo': page
    }
    if not os.path.exists(download_path):
        os.makedirs(download_path)
    tasks = []
    async with aiohttp.ClientSession() as session:
        async with session.post(url_data, data=data) as response:
            if response.status == 200:
                result = await response.json()
                result_img_list = result.get('data', {}).get('result', [])
                for item in result_img_list:
                    name = item.get('etag')
                    tasks.append(download_img(session, name, count))
                    count += 1
                print("休息3秒別爬了,已經爬完{}頁".format(page))
                time.sleep(3)
            else:
                print(f"Failed to fetch data for page {page}, status {response.status}")
        await asyncio.gather(*tasks)


async def download_img(session, name, count):
    try:
        url = f"{download_url}/{name}"
        async with session.get(url, headers=headers) as response:
            if response.status == 200:
                img_data = await response.read()
                file_path = os.path.join(download_path, str(count))
                async with aiofiles.open(file_path + ".jpg", 'wb') as file:
                    await file.write(img_data)
                print(f"Downloaded {url} to {file_path}")
            else:
                print(f"Failed to download {url}, status {response.status}")
    except Exception as e:
        print(f"Error downloading {name}: {e}")
        async with aiofiles.open("error.txt", 'w') as file:
            await file.write(f"Error downloading {name}: {e}")


if __name__ == '__main__':

    for page in range(1, 150):
        count = (page - 1) * 50 + 1
        # 從第1頁到第149頁
        asyncio.run(main(page, count))
        print("--------------------------新的開始------------------------------")

相關文章