import pypdf
# 輸入和輸出檔名
input_pdf_filename = 'file.pdf'
output_pdf_filename = 'file_with_toc.pdf'
csv_filename = 'output.csv'
# 建立一個PdfWriter例項
writer = pypdf.PdfWriter()
# 讀取原始PDF檔案
with open(input_pdf_filename, "rb") as input_pdf:
reader = pypdf.PdfReader(input_pdf)
writer.append_pages_from_reader(reader)
# 讀取CSV檔案並建立書籤
bookmarks = [] # 用於維護書籤層級結構
toc_entries = [] # 用於儲存書籤資訊
# 讀取CSV檔案內容
with open(csv_filename, 'r', encoding='GBK') as csv_file:
next(csv_file) # 忽略第一行
for line in csv_file:
hierarchy, title, page_number = line.strip().split(',')
page_number = int(page_number)
# 根據父子層級關係確定層級數值
level = hierarchy.count('.') + 1
toc_entries.append((level, page_number, title))
# 按CSV檔案中的順序,一行行新增書籤並確保層級正確
for entry in toc_entries:
level, page_number, title = entry
# 根據層級確定父書籤
parent_bookmark = None
if level > 1:
# 從當前層級的上一層級開始查詢父書籤
for i in range(level - 1, 0, -1):
try:
parent_bookmark = next(b for b in bookmarks if b.level == i)
break
except StopIteration:
continue
else:
parent_bookmark = None
# 新增書籤
bookmark = writer.add_outline_item(title=title, page_number=page_number, parent=parent_bookmark)
bookmark.level = level
bookmarks.append(bookmark) # 新增到書籤列表
# 寫入新的PDF檔案
with open(output_pdf_filename, "wb") as output_pdf:
writer.write(output_pdf)
# 完成後關閉
writer.close()