程式碼:
import os import requests from bs4 import BeautifulSoup from urllib.parse import urljoin, urlparse # 檢查URL是否有效 def is_valid_url(url): parsed = urlparse(url) return bool(parsed.netloc) and bool(parsed.scheme) # 檢查URL是否指向圖片 def is_image_url(url): try: response = requests.head(url) content_type = response.headers.get('Content-Type') return content_type and content_type.startswith('image/') except requests.RequestException: return False # 遞迴下載指定URL中的所有圖片 def get_images_from_url(url, download_folder): if is_valid_url(url): try: response = requests.get(url) response.raise_for_status() if is_image_url(url): # 直接下載圖片 img_name = os.path.join(download_folder, os.path.basename(url)) with open(img_name, 'wb') as f: f.write(response.content) print(f"圖片已下載:{img_name}") else: # 解析HTML頁面以查詢所有圖片標籤 soup = BeautifulSoup(response.text, 'html.parser') img_tags = soup.find_all('img') for img in img_tags: img_url = img.get('src') if img_url: # 構造完整的圖片URL img_url = urljoin(url, img_url) parsed_img_url = urlparse(img_url) if parsed_img_url.netloc: # 確保檔案的目錄存在 filename = os.path.basename(parsed_img_url.path) img_name = os.path.join(download_folder, filename) os.makedirs(os.path.dirname(img_name), exist_ok=True) # 檢查圖片URL是否指向圖片 if is_image_url(img_url): with open(img_name, 'wb') as f: img_response = requests.get(img_url) img_response.raise_for_status() f.write(img_response.content) print(f"圖片已下載:{img_name}") else: # 圖片URL指向網頁,遞迴下載網頁中的圖片 get_images_from_url(img_url, download_folder) else: # 相對路徑,補全URL後遞迴下載 base_url = urljoin(url, img_url.rstrip('/')) get_images_from_url(base_url, download_folder) except requests.exceptions.RequestException as e: print(f"請求過程中發生錯誤:{e}") # 要開始爬取的URL start_url = 'https://www.elitebabes.com/' # 儲存下載圖片的資料夾 download_folder = r'F:\jingguan\zhao' # 如果下載資料夾不存在,則建立它 if not os.path.exists(download_folder): os.makedirs(download_folder) # 開始遞迴下載圖片 get_images_from_url(start_url, download_folder) print("圖片下載完成。")