爬蟲 | 非同步請求aiohttp模組

abloger發表於2024-06-16

aiohttp模組,也就是async io http操作

# 1. 建立一個物件
aioreq = aiohttp.ClientSession() >>> 即對應非同步的requests

# 2. 用這個非同步requests來傳送請求
resp = aioreq.get(url)

# 3. 非同步寫入檔案,用到aiofiles模組,pip安裝,可學習:https://www.w3cschool.cn/article/86236403.html

async with aiofiles.open(...) as f:
  await f.write()

"""需要注意的是,完成後需要close來釋放資源,最好的辦法是用with來幫助管理。
而非同步中規定必須在with前面加上async,即 async with..."""

案例示範:

import asyncio
import aiohttp
import aiofiles

"""整體思路就是:
1. 非同步傳送請求
2. 非同步儲存資訊
3. 非同步寫入文件
"""

async def download(url):

    """第一種寫法是推薦寫法,用async with來管理會話、清理資源"""
    # 1. 非同步傳送請求 >>> aiohttp.ClientSession() = 非同步requests
    async with aiohttp.ClientSession() as aioreq:
        # # 2. 獲得響應,儲存資訊
        async with aioreq.get(url) as resp:
            r_content = await resp.content.read()
            # # 3. 非同步寫入檔案
            name = url.split('/')[-1]
            async with aiofiles.open(rf"F:\非同步\{name}", mode="wb") as  f:
                await f.write(r_content)

        print(f'{name}', '完成')


    """第二種寫法是手動關閉close,非常不推薦,需要手動關閉,有時候容易導致記憶體洩漏"""
    # # 1. 非同步傳送請求 >>> aiohttp.ClientSession() = requests
    # aioreq = aiohttp.ClientSession()
    # # 2. 獲得響應,儲存資訊
    # resp = await aioreq.get(url)  # 非同步掛起
    # r_content = await resp.content.read()
    # # 3. 非同步寫入檔案
    # name = url.split('/')[-1]
    # async with aiofiles.open(rf"F:\非同步\{name}", mode="wb") as f:
    #     await f.write(r_content)
    #
    # await resp.release()  >>> 文心一言說沒有這種操作
    # await aioreq.close()


async def main():
    urls = [
        "https://i1.huishahe.com/uploads/tu/201911/9999/85c8311a57.jpg",
        "https://i1.huishahe.com/uploads/allimg/202205/9999/dd321482f1.jpg",
        "https://i1.huishahe.com/uploads/allimg/202205/9999/dd06ff32f2.jpg"
    ]

    tasks = []
    for url in urls:
        task = asyncio.create_task(download(url))
        tasks.append(task)

    await asyncio.wait(tasks)


if __name__ == "__main__":
    asyncio.run(main())

相關文章