生存目錄-無層級

redufa發表於2024-11-03

image


import pypdf

# 輸入和輸出檔名
input_pdf_filename = 'file.pdf'
output_pdf_filename = 'file_with_toc.pdf'
csv_filename = 'output.csv'

# 建立一個PdfWriter例項
writer = pypdf.PdfWriter()

# 讀取原始PDF檔案
with open(input_pdf_filename, "rb") as input_pdf:
    reader = pypdf.PdfReader(input_pdf)
    writer.append_pages_from_reader(reader)

# 讀取CSV檔案並建立書籤
bookmarks = []  # 用於維護書籤層級結構
toc_entries = []  # 用於儲存書籤資訊

# 讀取CSV檔案內容
with open(csv_filename, 'r', encoding='GBK') as csv_file:
    next(csv_file)  # 忽略第一行
    for line in csv_file:
        hierarchy, title, page_number = line.strip().split(',')
        page_number = int(page_number)
        # 根據父子層級關係確定層級數值
        level = hierarchy.count('.') + 1
        toc_entries.append((level, page_number, title))

# 按CSV檔案中的順序,一行行新增書籤並確保層級正確
for entry in toc_entries:
    level, page_number, title = entry

    # 根據層級確定父書籤
    parent_bookmark = None
    if level > 1:
        # 從當前層級的上一層級開始查詢父書籤
        for i in range(level - 1, 0, -1):
            try:
                parent_bookmark = next(b for b in bookmarks if b.level == i)
                break
            except StopIteration:
                continue
    else:
        parent_bookmark = None

    # 新增書籤
    bookmark = writer.add_outline_item(title=title, page_number=page_number, parent=parent_bookmark)
    bookmark.level = level
    bookmarks.append(bookmark)  # 新增到書籤列表

# 寫入新的PDF檔案
with open(output_pdf_filename, "wb") as output_pdf:
    writer.write(output_pdf)

# 完成後關閉
writer.close()

相關文章