2788647047_py2

翎上發表於2024-08-16

import requests
import csv
from bs4 import BeautifulSoup

def get_domain_update_time(domain):
"""
獲取域名在 GitHub 上的更新時間。

Args:
domain: 域名。

Returns:
域名的更新時間 (UTC)。
"""
url = f"https://api.github.com/search/code?q={domain}+in:file+repo:stamparm/maltrail"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
if data["total_count"] > 0:
# 獲取最新的提交資訊
sha = data["items"][0]["sha"]
url = f"https://api.github.com/repos/stamparm/maltrail/commits/{sha}"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
return data["commit"]["committer"]["date"]
else:
return "未找到"
else:
print("請求失敗:", response.status_code)
return None

def get_domain_list(file_path):
"""
從檔案中提取所有域名。

Args:
file_path: 檔案路徑。

Returns:
域名列表。
"""
with open(file_path, "r") as f:
soup = BeautifulSoup(f, "html.parser")
# 提取所有域名
domains = [element.string for element in soup.find_all("a")]
return domains

def main(file_path):
"""
主函式。
"""
# 獲取域名列表
domains = get_domain_list(file_path)
# 獲取域名更新時間
domain_update_times = {}
for domain in domains:
domain_update_times[domain] = get_domain_update_time(domain)
# 輸出結果
with open("domain_update_times.csv", "w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["域名", "更新時間"])
for domain, update_time in domain_update_times.items():
writer.writerow([domain, update_time])

if __name__ == "__main__":
# 替換為 Maltrail 專案中 trails 目錄下某個檔案的路徑
file_path = "path/to/maltrail/file"
main(file_path)