Python自動化 | 利用python-docx 實現word操作

Xylon_發表於2020-09-26

一、安裝python-docx模組

pip install python-docx

二、讀取word檔案

1.word文件結構

Document: 文件
Paragraph:段落
Run:文字塊

2.提取文字
2.1提取段落例項,段數:

.paragraphs  獲取一個列表,包含每個段落的例項
from docx import Document

doc = Document("0.docx")
print(doc.paragraphs)
print(len(doc.paragraphs))

結果:

[<docx.text.paragraph.Paragraph object at 0x000001F88E2F2E80>, <docx.text.paragraph.Paragraph object at 0x000001F88E2F2C88>, <docx.text.paragraph.Paragraph object at 0x000001F88E2F2EF0>]
3

結果說明有三段
2.2提取段落內容

from docx import Document

doc = Document("0.docx")
for paragraph in doc.paragraphs:
	print(paragraph.text)
以上便是word與python結合的第二部分內容,後續將會持續更新excel,ppt,爬蟲,人工智慧等相關內容,敬請關注

 

2.3獲取文字塊run

一個格式為一個文字塊run 上述句子有7個文字塊run

from docx import Document

doc = Document("0.docx")
paragraph = doc.paragraphs[1]
runs = paragraph.runs
print(runs)
[<docx.text.run.Run object at 0x000001F88E2F2E10>, <docx.text.run.Run object at 0x000001F88E2F2C88>, <docx.text.run.Run object at 0x000001F88E2F2E80>, <docx.text.run.Run object at 0x000001F88E2F2DD8>, <docx.text.run.Run object at 0x000001F88E2F2EB8>, <docx.text.run.Run object at 0x000001F88E2F2F28>, <docx.text.run.Run object at 0x000001F88E2F2F60>]

paragraph.runs 獲取一個列表,得到每個文字塊的例項

2.4提取文字塊的內容

from docx import Document

doc = Document("0.docx")
paragraph = doc.paragraphs[1]
runs = paragraph.runs
print(runs)
for run in runs:
	print(run.text)
excel與python結合的第二部分內容,
後續將會持續更新excel
,
ppt
,
爬蟲
,人工智慧

三、寫入word檔案

1.新增文字內容
1.1新增標題

方法:doc.add_heading("標題內容",level=標題等級)
from docx import Document

doc = Document()
doc.add_heading("新增一個一級標題",level=1)

1.2新增段落

方法:doc.add_paragraph("段落文字內容")
from docx import Document

doc = Document()
doc.add_heading("新增一個一級標題",level=1)
paragraph1 = doc.add_paragraph("新增段落1")
paragraph2 = doc.add_paragraph("新增段落2")

1.3新增文字塊

方法:add_run("文字內容").bold = True (一些可設定的引數)
from docx import Document

doc = Document()
doc.add_heading("新增一個一級標題",level=1)
paragraph1 = doc.add_paragraph("新增段落1")
paragraph2 = doc.add_paragraph("新增段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗體").bold = True
paragraph3.add_run('正常')
paragraph3.add_run('斜體').italic = True

1.4新增分頁

方法:doc.add_page_break()
from docx import Document

doc = Document()							#新建檔案
doc.add_heading("新增一個一級標題",level=1)	#標題
paragraph1 = doc.add_paragraph("新增段落1")  #段落
paragraph2 = doc.add_paragraph("新增段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗體").bold = True		#文字塊
paragraph3.add_run('正常')
paragraph3.add_run('斜體').italic = True
doc.add_page_break()

2.新增圖片and表格
新增圖片:

方法:
doc.add_picture("圖片地址",width=Cm(設定的寬度))
doc.add_picture("圖片地址",height=Cm(設定的高度))
只需要給定一個高度或者寬度,另一個尺寸會根據比列自動計算
from docx import Document
from docx.shared import Cm

doc = Document()							#新建檔案
doc.add_heading("新增一個一級標題",level=1)	#標題
paragraph1 = doc.add_paragraph("新增段落1")  #段落
paragraph2 = doc.add_paragraph("新增段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗體").bold = True		#文字塊
paragraph3.add_run('正常')
paragraph3.add_run('斜體').italic = True
doc.add_page_break()						#新增分頁
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))

新增表格:

方法:doc.add_table(rows=行數,cols=列數)  
from docx import Document
from docx.shared import Cm

doc = Document()							#新建檔案
doc.add_heading("新增一個一級標題",level=1)	#標題
paragraph1 = doc.add_paragraph("新增段落1")  #段落
paragraph2 = doc.add_paragraph("新增段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗體").bold = True		#文字塊
paragraph3.add_run('正常')
paragraph3.add_run('斜體').italic = True
doc.add_page_break()						#新增分頁
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))

tabs = [
	["姓名",'學號',"成績"],
	['李華',101,93],
	['小明',102,94],
	['小麗',103,98],
	['小張',104,100],
]
table = doc.add_table(rows=4,cols=3)
for row in range(4):
	cells = table.rows[row].cells
	for col in range(3):
		cells[col].text = str(tabs[row][col])

3.儲存

doc.save(檔案路徑)

完整程式碼:

from docx import Document
from docx.shared import Cm
doc = Document()							#新建檔案
doc.add_heading("新增一個一級標題",level=1)	#標題
paragraph1 = doc.add_paragraph("新增段落1")  #段落
paragraph2 = doc.add_paragraph("新增段落2")
paragraph3 = doc.add_paragraph()
paragraph3.add_run("粗體").bold = True		#文字塊
paragraph3.add_run('正常')
paragraph3.add_run('斜體').italic = True
doc.add_page_break()						#新增分頁
doc.add_picture("00.png",width=Cm(2))
doc.add_picture("00.png",height=Cm(3))

tabs = [
	["姓名",'學號',"成績"],
	['李華',101,93],
	['小明',102,94],
	['小麗',103,98],
	['小張',104,100],
]
table = doc.add_table(rows=4,cols=3)
for row in range(4):
	cells = table.rows[row].cells
	for col in range(3):
		cells[col].text = str(tabs[row][col])
doc.save('10.docx')

結果:
在這裡插入圖片描述
在這裡插入圖片描述

 

四、調整Word文件樣式

1.修改文字樣式

from docx import Document
from docx.shared import Pt,RGBColor
from docx.oxml.ns import qn

doc = Document("0.docx")
for paragraph in doc.paragraphs:
	for run in paragraph.runs:
		run.font.bold = True     	#黑體
		run.font.italic = True		#斜體
		run.font.underline = True	#下劃線
		run.font.strike = True		#刪除線
		run.font.shadow = True		#陰影
		run.font.size = Pt(15)		#字型大小
		run.font.color.rgb = RGBColor(255,255,0)	#顏色
		run.font.name = "微軟雅黑"					#字型
		r = run._element.rPr.rFonts					#中文字型
		r.set(qn('w:eastAsia'),"微軟雅黑")

doc.save("xin0.docx")

2.修改段落格式
2.1對齊樣式:

from docx.enum.text import WD_ALIGN_PARAGRAPH


paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

也可選擇其他不同對齊
如:LEFT,RIGHT等
2.2行間距

paragraph.paragraph_foramt.line_spacing = 2.0 
paragraph.paragraph_foramt.line_spacing = 2.0 
用浮點數,表示兩倍行間距

2.3段前和段後間距

paragraph.paragraph_format.space_before = Pt(12)
paragraph.paragraph_format.space_after = Pt(21)
pt(21)表示21磅

3.整體程式碼

from docx import Document
from docx.shared import Pt,RGBColor
from docx.oxml.ns import qn
from docx.enum.text import WD_ALIGN_PARAGRAPH



doc = Document("0.docx")
for paragraph in doc.paragraphs:
	for run in paragraph.runs:
		run.font.bold = True     	#黑體
		run.font.italic = True		#斜體
		run.font.underline = True	#下劃線
		run.font.strike = True		#刪除線
		run.font.shadow = True		#陰影
		run.font.size = Pt(15)		#字型大小
		run.font.color.rgb = RGBColor(255,255,0)	#顏色
		run.font.name = "微軟雅黑"					#字型
		r = run._element.rPr.rFonts					#中文字型
		r.set(qn('w:eastAsia'),"微軟雅黑")

		paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER   #duiqi
		paragraph.paragraph_format.line_spacing = 2.0     #用浮點數,表示兩倍行間距

		paragraph.paragraph_format.space_before = Pt(12)	#pt(21)表示21磅
		paragraph.paragraph_format.space_after = Pt(21)

doc.save("xin1.docx")

結果:
在這裡插入圖片描述

相關文章