C# 讀取Word文字框中的文字、圖片和表格(附VB.NET程式碼)

iceblue發表於2021-03-10

【概述】

Word中可插入文字框,在文字框中可新增文字、圖片、表格等內容。本篇文章通過C#程式程式碼介紹如何來讀取文字框中的文字、圖片和表格等內容。附VB.NET程式碼,有需要可作參考。

【程式環境】

程式中所需必要的程式集檔案Spire.Doc.dll,及其他相關dll檔案(見下文)。

用於測試的Word源文件如圖:

 

【程式程式碼】

1.讀取文字框中的文字

所需程式集:

【C#】

using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using System;
using System.IO;
using System.Text;

namespace ExtractText
{
    class Program
    {
        static void Main(string[] args)
        {
            //載入Word源文件
            Document doc = new Document();
            doc.LoadFromFile("test.docx");

            //獲取文字框
            TextBox textbox = doc.TextBoxes[0];

            //建立StringBuilder類的物件
            StringBuilder sb = new StringBuilder();

            //遍歷文字框中的物件,獲取文字
            foreach (object obj in textbox.Body.ChildObjects)
            {
                if (obj is Paragraph)
                {
                    String text = ((Paragraph)obj).Text;
                    sb.AppendLine(text);
                }
            }

            //儲存寫入的txt文件到指定路徑
            File.WriteAllText("ExtractedText.txt", sb.ToString());
            System.Diagnostics.Process.Start("ExtractedText.txt");
        }
    }
}

【vb.net】

Imports Spire.Doc
Imports Spire.Doc.Documents
Imports Spire.Doc.Fields
Imports System.IO
Imports System.Text

Namespace ExtractText
    Class Program
        Private Shared Sub Main(args As String())
            '載入Word源文件
            Dim doc As New Document()
            doc.LoadFromFile("test.docx")

            '獲取文字框
            Dim textbox As TextBox = doc.TextBoxes(0)

            '建立StringBuilder類的物件
            Dim sb As New StringBuilder()

            '遍歷文字框中的物件,獲取文字
            For Each obj As Object In textbox.Body.ChildObjects
                If TypeOf obj Is Paragraph Then
                    Dim text As [String] = DirectCast(obj, Paragraph).Text
                    sb.AppendLine(text)
                End If
            Next

            '儲存寫入的txt文件到指定路徑
            File.WriteAllText("ExtractedText.txt", sb.ToString())
            System.Diagnostics.Process.Start("ExtractedText.txt")
        End Sub
    End Class
End Namespace

文字讀取結果:

2.讀取文字框中的圖片

所需程式集:

【C#】

using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using System;

namespace ExtractImg
{
    class Program
    {
        static void Main(string[] args)
        {
            //載入Word源文件
            Document doc = new Document();
            doc.LoadFromFile("test.docx");

            //獲取文字框
            TextBox textbox = doc.TextBoxes[0];    

            int index = 0 ;
            //遍歷文字框中所有段落
            for (int i = 0 ; i < textbox.Body.Paragraphs.Count;i++)
            {
                Paragraph paragraph = textbox.Body.Paragraphs[i];
                //遍歷段落中的所有子物件
                for (int j = 0; j < paragraph.ChildObjects.Count; j++)
                {
                    object obj = paragraph.ChildObjects[j];
                    
                    //判定物件是否為圖片
                    if (obj is DocPicture)
                    {
                        //獲取圖片
                        DocPicture picture = (DocPicture) obj;
                        String imageName = String.Format("Image-{0}.png", index);
                        picture.Image.Save(imageName, System.Drawing.Imaging.ImageFormat.Png);
                        index++;
                    }
                }
            }
                 
        }
    }
}

【vb.net】

Imports Spire.Doc
Imports Spire.Doc.Documents
Imports Spire.Doc.Fields

Namespace ExtractImg
    Class Program
        Private Shared Sub Main(args As String())
            '載入Word源文件
            Dim doc As New Document()
            doc.LoadFromFile("test.docx")

            '獲取文字框
            Dim textbox As TextBox = doc.TextBoxes(0)

            Dim index As Integer = 0
            '遍歷文字框中所有段落
            For i As Integer = 0 To textbox.Body.Paragraphs.Count - 1
                Dim paragraph As Paragraph = textbox.Body.Paragraphs(i)
                '遍歷段落中的所有子物件
                For j As Integer = 0 To paragraph.ChildObjects.Count - 1
                    Dim obj As Object = paragraph.ChildObjects(j)

                    '判定物件是否為圖片
                    If TypeOf obj Is DocPicture Then
                        '獲取圖片
                        Dim picture As DocPicture = DirectCast(obj, DocPicture)
                        Dim imageName As [String] = [String].Format("Image-{0}.png", index)
                        picture.Image.Save(imageName, System.Drawing.Imaging.ImageFormat.Png)
                        index += 1
                    End If
                Next
            Next

        End Sub
    End Class
End Namespace

圖片讀取結果:

3.讀取文字框中的表格

所需程式集:

【C#】

using Spire.Doc;
using Spire.Doc.Documents;
using Spire.Doc.Fields;
using System.IO;
using System.Text;

namespace ExtractTable
{
    class Program
    {
        static void Main(string[] args)
        {
            //載入Word文件
            Document doc = new Document();
            doc.LoadFromFile("test.docx");

            //獲取文字框
            TextBox textbox = doc.TextBoxes[0];

            //獲取文字框中表格
            Table table = textbox.Body.Tables[0] as Table;

            StringBuilder sb = new StringBuilder();

            //遍歷表格中的段落並提取文字
            foreach (TableRow row in table.Rows)
            {
                foreach (TableCell cell in row.Cells)
                {
                    foreach (Paragraph paragraph in cell.Paragraphs)
                    {
                        sb.AppendLine(paragraph.Text);
                    }
                }
            }
            File.WriteAllText("ExtractedTable.txt", sb.ToString());
        }
    }
}

【vb.net】

Imports Spire.Doc
Imports Spire.Doc.Documents
Imports Spire.Doc.Fields
Imports System.IO
Imports System.Text

Namespace ExtractTable
    Class Program
        Private Shared Sub Main(args As String())
            '載入Word文件
            Dim doc As New Document()
            doc.LoadFromFile("test.docx")

            '獲取文字框
            Dim textbox As TextBox = doc.TextBoxes(0)

            '獲取文字框中表格
            Dim table As Table = TryCast(textbox.Body.Tables(0), Table)

            Dim sb As New StringBuilder()

            '遍歷表格中的段落並提取文字
            For Each row As TableRow In table.Rows
                For Each cell As TableCell In row.Cells
                    For Each paragraph As Paragraph In cell.Paragraphs
                        sb.AppendLine(paragraph.Text)
                    Next
                Next
            Next
            File.WriteAllText("ExtractedTable.txt", sb.ToString())
        End Sub
    End Class
End Namespace

表格資料讀取結果:

 

【最後】

以上是本文關於通過C#程式讀取Word中的文字框的方法。另推薦閱讀《Java 讀取Word文字框中的文字、圖片和表格

 

(本文完,如需轉載,請務必註明出處!!)

 

相關文章