安裝依賴
composer require guzzlehttp/guzzle:*
如果不想使用guzzlehttp,可以自己實現curl,反正只要獲取網站正文就行
核心原始碼
<?php
namespace xfstu\http;
use GuzzleHttp\Client;
class titleFavicon
{
private function httpGet($url)
{
$client = new Client([
'headers' => [
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0',
'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language' => 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
'Accept-Encoding' => 'gzip, deflate, br',
'Connection' => 'keep-alive',
'Upgrade-Insecure-Requests' => '1',
'Sec-Fetch-Dest' => 'document',
'Sec-Fetch-Mode' => 'navigate',
'Sec-Fetch-Site' => 'same-origin',
'Sec-Fetch-User' => '?1',
'Pragma' => 'no-cache',
'Cache-Control' => 'no-cache'
]
]);
$response = $client->get($url);
return $response->getBody()->getContents();
}
public function getTitle($url)
{
$html = $this->httpGet($url);
preg_match('/<title[^>]*>\s*(.*?)\s*<\/title>/i', $html, $title_matches);
if (is_array($title_matches) && count($title_matches) == 2) {
return $title_matches[1];
}
$truncated_string = mb_substr($html, 0, 100, 'utf-8');
return $truncated_string;
}
public function getFavicon($url, $iconType = 1, $path = './')
{
$html = $this->httpGet($url);
$res = [
'url' => null,
'path' => null
];
preg_match('/<head[^>]*>(.*?)<\/head>/is', $html, $head_matches);
if (!empty($head_matches[1])) {
// Try to find icon link within head content
$url_parts = parse_url($url);
$base_url = $url_parts['scheme'] . '://' . $url_parts['host'];
preg_match_all('/<link[^>]*rel=["\'](?:shortcut )?icon["\'][^>]*href=["\']([^"\']+)["\'][^>]*>/i', $head_matches[1], $icon_matches);
if (!empty($icon_matches[1])) {
// Use the first found icon link
$icon_url = $icon_matches[1][0];
$icon_url_parts = parse_url($icon_url);
// return dump($icon_url_parts);
$iconScheme = isset($icon_url_parts['scheme']) ? $icon_url_parts['scheme'] : $url_parts['scheme'];
$iconHost = isset($icon_url_parts['host']) ? $icon_url_parts['host'] : $url_parts['host'];
$iconPath = isset($icon_url_parts['path']) ? $icon_url_parts['path'] : '/';
$icon_url = $iconScheme . '://' . $iconHost . $iconPath;
} else {
// Use default favicon.ico if not found
// $icon_url = rtrim($url, '/') . '/favicon.ico';
$icon_url = $base_url . '/favicon.ico';
}
// Download the icon
$res['url'] = $icon_url;
if ($iconType == 1 && !file_exists($icon_url)) {
$icon_data = file_get_contents($icon_url);
if ($icon_data !== false) {
// Save the icon with the domain name as filename
$filename = parse_url($url, PHP_URL_HOST) . '.ico';
file_put_contents($path . $filename, $icon_data);
$res['path'] = $path . $filename;
}
}
if (file_exists($icon_url)) {
$res['path'] = $path . $filename;
}
}
return $res;
}
/**
* 獲取網頁標題和圖示
* @param string $url 網址
* @param int $iconType 圖示型別 0:僅獲取圖示地址不下載 1:下載圖示並儲存
* @param string $path 圖示儲存路徑
* @return array
*/
public static function getInfo($url, $iconType = 1, $path = './')
{
$obj = new titleFavicon();
$title = $obj->getTitle($url);
$icon = $obj->getFavicon($url, $iconType, $path);
return array_merge(['title' => $title], $icon);
// return ['title' => $title, 'icon' => $icon];
}
}
use使用
$res = titleFavicon::getInfo('https://www.baidu.com', 1, './');
dump($res);
array(3) {
["title"] => string(27) "百度一下,你就知道"
["url"] => string(33) "https://www.baidu.com/favicon.ico"
["path"] => string(19) "./www.baidu.com.ico"
}