一、CSS選擇器
二、CSS選擇器例項
按照class屬性值取出網頁資訊
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
teacher_info=sel.css('.teacher_info').extract()
print(teacher_info)
輸出結果:輸出class為teacher_info的所有html元素
按照id值取出html網頁元素
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
info_tag=sel.css('#info').extract()
print(info_tag)
輸出結果:
選取對應class屬性值下的對應元素
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
age_tag=sel.css(".teacher_info > p").extract()[0]
print(age_tag)
輸出結果:
選取輸出對應class屬性值下的對應元素的值
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
age_tag_value=sel.css(".teacher_info > p::text").extract()[0]
print(age_tag_value)
輸出結果:
輸出對應class屬性的指定第n個孩子節點的值
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
name_tag=".teacher_info p:nth_child(2)::text"
name_tag_value1=sel.css(".teacher_info > p:nth_child(2)::text").extract()[0]
print(name_tag_value1)
name_tag_value2=sel.css(".teacher_info p:nth_child(2)::text").extract()[0]
print(name_tag_value2)
name_tag_value3=sel.css(name_tag).extract()[0]
print(name_tag_value3)
輸出結果:
輸出對應class屬性後面第一個對應P屬性的值
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr>
</tbody>
</table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
course_info1=sel.css(".teacher_info + p::text").extract()[0]
print(course_info1)
輸出結果:
輸出所有與class屬性相鄰P元素的值
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
course_info2=sel.css(".teacher_info ~ p::text").extract()[0]
print(course_info2)
輸出結果:
輸出指定超連結的標籤屬性值
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
couse_url1=sel.css("a[href='https://coding.imooc.com/class/200.html']::text").extract()[0]
print(couse_url1)
輸出結果:
獲取指定超連結包含某字串的所有標籤屬性值
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
course_url2=sel.css("a[href*='imooc']::text").extract()
print(course_url2)
輸出結果:
向後獲取同級標籤屬性的值
from scrapy import Selector
html="""
<html lang="en">
<head>
<meta charset="UTF-8">
<title>bobby基本資訊</title>
<script src="jquery-3.5.1.min.js"></script>
</head>
<body>
<div id="info">
<p style="color: blue">講師資訊</p>
<div class="teacher_info">
Python全棧工程師
<p class="age">年齡:29</p>
<p class="name bobbyname" data-bind="bobby">姓名:bobby</p>
<p class="work_years">工作年限:7年</p>
<p class="position">職位:python開發工程師</p>
</div>
<p style="color:aquamarine">課程資訊</p>
<table class="courses">
<tbody><tr><th>課程名稱</th>
<th>講師</th>
<th>地址</th>
</tr><tr>
<td>django打造線上教育</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/78.html">訪問</a></td>
</tr><tr>
<td>python高階程式設計</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/200.html">訪問</a></td>
</tr><tr>
<td>scrapy分散式爬蟲</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/92.html">訪問</a></td>
</tr><tr>
<td>diango rest framework打造生鮮電商</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/131.html">訪問</a></td>
</tr><tr>
<td>tornado從入門到精通</td>
<td>bobby</td>
<td><a href="https://coding.imooc.com/class/290.html">訪問</a></td>
</tr></tbody></table>
</div>
</body>
</html>
"""
#先取出所有的html值
sel=Selector(text=html)
sibling_tag=sel.css("p.name ~ p::text").extract()
print(sibling_tag)
輸出結果: