用python寫個爬取指定網址上所有圖片,並能根據獲取到的圖片網址,進入網址,再次進行圖片獲取 的程式碼指令碼

大话人生發表於2024-04-28

程式碼:

import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin, urlparse

# 檢查URL是否有效
def is_valid_url(url):
    parsed = urlparse(url)
    return bool(parsed.netloc) and bool(parsed.scheme)

# 檢查URL是否指向圖片
def is_image_url(url):
    try:
        response = requests.head(url)
        content_type = response.headers.get('Content-Type')
        return content_type and content_type.startswith('image/')
    except requests.RequestException:
        return False

# 遞迴下載指定URL中的所有圖片
def get_images_from_url(url, download_folder):
    if is_valid_url(url):
        try:
            response = requests.get(url)
            response.raise_for_status()

            if is_image_url(url):
                # 直接下載圖片
                img_name = os.path.join(download_folder, os.path.basename(url))
                with open(img_name, 'wb') as f:
                    f.write(response.content)
                print(f"圖片已下載:{img_name}")
            else:
                # 解析HTML頁面以查詢所有圖片標籤
                soup = BeautifulSoup(response.text, 'html.parser')
                img_tags = soup.find_all('img')

                for img in img_tags:
                    img_url = img.get('src')
                    if img_url:
                        # 構造完整的圖片URL
                        img_url = urljoin(url, img_url)
                        parsed_img_url = urlparse(img_url)

                        if parsed_img_url.netloc:
                            # 確保檔案的目錄存在
                            filename = os.path.basename(parsed_img_url.path)
                            img_name = os.path.join(download_folder, filename)
                            os.makedirs(os.path.dirname(img_name), exist_ok=True)

                            # 檢查圖片URL是否指向圖片
                            if is_image_url(img_url):
                                with open(img_name, 'wb') as f:
                                    img_response = requests.get(img_url)
                                    img_response.raise_for_status()
                                    f.write(img_response.content)
                                    print(f"圖片已下載:{img_name}")
                            else:
                                # 圖片URL指向網頁,遞迴下載網頁中的圖片
                                get_images_from_url(img_url, download_folder)
                        else:
                            # 相對路徑,補全URL後遞迴下載
                            base_url = urljoin(url, img_url.rstrip('/'))
                            get_images_from_url(base_url, download_folder)

        except requests.exceptions.RequestException as e:
            print(f"請求過程中發生錯誤:{e}")

# 要開始爬取的URL
start_url = 'https://www.elitebabes.com/'

# 儲存下載圖片的資料夾
download_folder = r'F:\jingguan\zhao'

# 如果下載資料夾不存在,則建立它
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

# 開始遞迴下載圖片
get_images_from_url(start_url, download_folder)
print("圖片下載完成。")

相關文章