PHP獲取網站標題和圖示

小枫同学發表於2024-03-10

安裝依賴

composer require guzzlehttp/guzzle:*

如果不想使用guzzlehttp,可以自己實現curl,反正只要獲取網站正文就行

核心原始碼

<?php

namespace xfstu\http;

use GuzzleHttp\Client;

class titleFavicon
{
    private function httpGet($url)
    {
        $client = new Client([
            'headers' => [
                'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:123.0) Gecko/20100101 Firefox/123.0',
                'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
                'Accept-Language' => 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2',
                'Accept-Encoding' => 'gzip, deflate, br',
                'Connection' => 'keep-alive',
                'Upgrade-Insecure-Requests' => '1',
                'Sec-Fetch-Dest' => 'document',
                'Sec-Fetch-Mode' => 'navigate',
                'Sec-Fetch-Site' => 'same-origin',
                'Sec-Fetch-User' => '?1',
                'Pragma' => 'no-cache',
                'Cache-Control' => 'no-cache'
            ]
        ]);
        $response = $client->get($url);
        return $response->getBody()->getContents();
    }

    public function getTitle($url)
    {
        $html = $this->httpGet($url);
        preg_match('/<title[^>]*>\s*(.*?)\s*<\/title>/i', $html, $title_matches);
        if (is_array($title_matches) && count($title_matches) == 2) {
            return $title_matches[1];
        }
        $truncated_string = mb_substr($html, 0, 100, 'utf-8');
        return $truncated_string;
    }

    public function getFavicon($url, $iconType = 1, $path = './')
    {
        $html = $this->httpGet($url);
        $res = [
            'url'   =>  null,
            'path'   =>  null
        ];
        preg_match('/<head[^>]*>(.*?)<\/head>/is', $html, $head_matches);
        if (!empty($head_matches[1])) {
            // Try to find icon link within head content
            $url_parts = parse_url($url);
            $base_url = $url_parts['scheme'] . '://' . $url_parts['host'];

            preg_match_all('/<link[^>]*rel=["\'](?:shortcut )?icon["\'][^>]*href=["\']([^"\']+)["\'][^>]*>/i', $head_matches[1], $icon_matches);
            if (!empty($icon_matches[1])) {
                // Use the first found icon link
                $icon_url = $icon_matches[1][0];
                $icon_url_parts = parse_url($icon_url);
                // return dump($icon_url_parts);
                $iconScheme = isset($icon_url_parts['scheme']) ? $icon_url_parts['scheme'] : $url_parts['scheme'];
                $iconHost = isset($icon_url_parts['host']) ? $icon_url_parts['host'] : $url_parts['host'];
                $iconPath = isset($icon_url_parts['path']) ? $icon_url_parts['path'] : '/';
                $icon_url = $iconScheme . '://' . $iconHost . $iconPath;
            } else {
                // Use default favicon.ico if not found
                // $icon_url = rtrim($url, '/') . '/favicon.ico';
                $icon_url = $base_url . '/favicon.ico';
            }

            // Download the icon
            $res['url'] = $icon_url;
            if ($iconType == 1 && !file_exists($icon_url)) {
                $icon_data = file_get_contents($icon_url);
                if ($icon_data !== false) {
                    // Save the icon with the domain name as filename
                    $filename = parse_url($url, PHP_URL_HOST) . '.ico';
                    file_put_contents($path . $filename, $icon_data);
                    $res['path'] = $path . $filename;
                }
            }
            if (file_exists($icon_url)) {
                $res['path'] = $path . $filename;
            }
        }
        return $res;
    }

    /**
     * 獲取網頁標題和圖示
     * @param string $url 網址
     * @param int $iconType 圖示型別 0:僅獲取圖示地址不下載 1:下載圖示並儲存
     * @param string $path 圖示儲存路徑
     * @return array
     */
    public static function getInfo($url, $iconType = 1, $path = './')
    {
        $obj = new titleFavicon();
        $title = $obj->getTitle($url);
        $icon = $obj->getFavicon($url, $iconType, $path);
        return array_merge(['title' => $title], $icon);
        // return ['title' => $title, 'icon' => $icon];
    }
}

use使用

$res = titleFavicon::getInfo('https://www.baidu.com', 1, './');
dump($res);
array(3) {
  ["title"] => string(27) "百度一下,你就知道"
  ["url"] => string(33) "https://www.baidu.com/favicon.ico"
  ["path"] => string(19) "./www.baidu.com.ico"
}

相關文章