例題
import lxml.html
test_data = """
<div>
<ul>
<li class="item-0"><a href="link1.html" id="places_neighbours__row">9,596,960first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html" id="places_neighbours__row">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a></li>
<li class="good-0"><a href="link5.html">fifth item</a></li>
</ul>
<book>
<title lang="aaengbb">111111</title>
<price id="places_neighbours__row">29.99</price>
</book>
<book>
<title lang="zh">222222</title>
<price>39.95</price>
</book>
<book>
<title>33333</title>
<price>40</price>
</book>
</div>
<a>
<book>
<title>123</title>
</book>
</a>
"""
"""
/ 從根標籤開始 必須具有嚴格的父子關係
// 從當前標籤 後續節點含有即可選出
* 萬用字元,選擇所有
//div/book[1]/title 選擇div下第一個book標籤的title元素
//div/book/title[@lang="zh"]選擇title屬性含有lang且內容是zh的title元素
//div/book/title //book/title //title //div//title 具有相同的結果,因為使用相對路徑最終都指向title
//book/title/@* 將title所有的屬性值選擇出來
//book/title/text() 將title的內容選擇出來,使用內建text()函式
//a[@href="link1.html" and @id="places_neighbours__row"]
//a[@href="link1.html" or @id="places_neighbours__row"]
//div/book[last()]/title/text() 將最後一個book元素選出
//div/book[price > 39]/title 將book子標籤price數值大於39的選擇出來
//li[starts-with(@class,`item`)] 將class屬性字首是item的li標籤選出
//title[contains(@lang,`eng`)] 將title屬性lang含有eng關鍵字的標籤選出
"""
html = lxml.html.fromstring(test_data)
#html_data = html.xpath(`//div/book/title/text()`)
#html_data = html.xpath(`//div/book[1]/title/text()`)
#html_data = html.xpath(`//div/book/title[@lang="zh"]/text()`)
#html_data = html.xpath(`//div/book/title/text()`)
# html_data = html.xpath(`//book/title/text()`)
# html_data = html.xpath(`//title/text()`)
# html_data = html.xpath(`//div//title/text()`)
# html_data = html.xpath(`//book/title/@*`)
# html_data = html.xpath(`//a[@href="link1.html" and @id="places_neighbours__row"]/text()`)
#html_data = html.xpath(`//a[@href="link2.html"]/text()`)
# html_data = html.xpath(`//div/ul/li/a[@id]/text()`)
# html_data = html.xpath(`//a[@href="link1.html" and @id="places_neighbours__row"]/@*`)
# html_data = html.xpath(`//a[@href="link1.html" and @id="places_neighbours__row"]/@href`)
# html_data = html.xpath(`//a[@href="link1.html" or @id="places_neighbours__row"]/text()`)
# html_data = html.xpath(`//div/book[last()]/title/text()`)
#html_data = html.xpath(`//div/book[price > 39]/title/text()`)
# html_data = html.xpath(`//li[starts-with(@class,"item")]/a/text()`)
html_data = html.xpath(`//title[contains(@lang,"eng")]/text()`)
for i in html_data:
print(i)