【Java】Word題庫解析2

emdzz發表於2024-09-08

初稿見:https://www.cnblogs.com/mindzone/p/18362194

一、新增需求

在原稿題庫之後,還需要生成一份純題目 + 純答案

答案放在開頭,題目裡面去掉答案

在檢查題型時還發現部分內容略有區別:

所以在判斷是否為答案的時候需要相容這種答案

二、關於老版本支援

doc2000版需要追加一個scratchpad的庫支援才行

<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi</artifactId>
    <version>5.0.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-ooxml</artifactId>
    <version>5.0.0</version>
</dependency>
<dependency>
    <groupId>org.apache.poi</groupId>
    <artifactId>poi-scratchpad</artifactId>
    <version>5.0.0</version>
</dependency>

  

需要匯入的資源:

import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;

  

三、工具類實現

package cn.cloud9.word;

import com.alibaba.druid.util.StringUtils;
import lombok.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.File;
import java.io.FileInputStream;
import java.util.*;
import java.util.stream.Collectors;

public class ExamUtil {
    private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "參考答案:");
    private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
    private static final String NUMBER_REGEXP = "^[1-9]\\d*";
    private static final String SPLIT_IDENTIFY = "\\.";

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class RoughItem {
        public int serial;
        public String exCode;
        public String content;
    }

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class ExamItem {
        public String no;
        public String title;
        public String type;
        public String answer;
        public String explain;
    }

    @SneakyThrows
    public static XWPFDocument getWordFileDocxType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
        fileInputStream.close();
        return xwpfDocument;
    }

    @SneakyThrows
    public static HWPFDocument getWordFileDocType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);
        fileInputStream.close();
        return hwpfDocument;
    }


    @SneakyThrows
    public static void main(String[] args) {
        int examCount = 0;
        String exCode = "";
        List<RoughItem> roughItems = new ArrayList<>();
        CharacterProperties props = new CharacterProperties();
        props.setFontSize(32);

        String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案  (增加 1301-2100共 800)中級保育師增加題庫 .doc";
        String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案  (增加 1301-2100共 800)中級保育師增加題庫 " + new Date().getTime() + ".doc";
        HWPFDocument wordFile = getWordFileDocType(filePath);
        Range range = wordFile.getRange();
        int numParagraphs = range.numParagraphs();


        for (int i = 0; i < numParagraphs; i++) {
            Paragraph paragraph = range.getParagraph(i);
            String text = paragraph.text();
            if (StringUtils.isEmpty(text)) continue;
            /* 按點號分割字串 */
            String[] split = text.split(SPLIT_IDENTIFY);
            /* 首個字串是否匹配數值序號 */
            boolean isExamNo = split[0].matches(NUMBER_REGEXP);
            /* 是否為答案 */
            boolean isAnswer = text.startsWith(ANSWER_PREFIX.get(0)) || text.startsWith(ANSWER_PREFIX.get(1));
            /* 是否為選項 */
            boolean isOptions = OPTIONS.contains(split[0]);
            /* 當判斷為題目序列時,迭代計數變數,是一道新的題目 */
            if (isExamNo) {
                ++ examCount;
                exCode = split[0];
                ExamUtil.RoughItem roughItem = ExamUtil.RoughItem.builder()
                        .serial(examCount)
                        .content(text)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            } else if (isAnswer || isOptions) {
                /* 反之不是題目序列,而是選項,答案,解析時,儲存起來 */
                RoughItem roughItem = RoughItem.builder()
                        .serial(examCount)
                        .content(text)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            }
            /* 答案部分是一個完整段落,所以對其刪除即可 */
            if (isAnswer) paragraph.delete();
        }

        List<ExamItem> examItems = new ArrayList<>();
        /* 收集完成後使用序列進行分組處理 */
        Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            /* 第一項一定是題目 */
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            content = content.replaceAll("\r", "");
            /* 處理集合得到答案和解析,解析不一定存在,所以orElse設定空串預設值 */
            String answer = v.stream()
                    .map(RoughItem::getContent)
                    .filter(xContent -> xContent.startsWith(ANSWER_PREFIX.get(0)) || xContent.startsWith(ANSWER_PREFIX.get(1)))
                    .map(x -> x.replaceAll(ANSWER_PREFIX.get(1), "").replaceAll(ANSWER_PREFIX.get(0), ""))
                    .findFirst()
                    .orElse("");
            answer = answer.replaceAll("\r", "");
            /* 包裝成題目物件後給呼叫者消費 */
            ExamItem build = ExamItem
                    .builder()
                    .no(titleItem.getExCode())
                    .title(content)
                    .type(null)
                    .answer(answer)
                    .explain(null)
                    .build();
            examItems.add(build);
        });

        examItems.forEach(System.out::println);

        /* 建立一行para,寫N個答案在一行中  rowSize = N */
        int examTotal = examItems.size();
        int rowSize = 10;
        boolean isComplete = examTotal % rowSize == 0;
        int totalRow = examTotal / rowSize;
        totalRow = isComplete ? totalRow : totalRow + 1;
        /* 因為用的是insertBefore方式插入,所以需要反著翻頁寫入 */
        for (int currentRow = totalRow; currentRow >= 1; currentRow--) {
            int begin = (currentRow - 1) * rowSize;
            int end = (currentRow * rowSize) - 1;
            StringBuilder rowText = new StringBuilder();
            for (int exIdx = begin; exIdx <= end; exIdx++) {
                if (exIdx < 0) break;
                else if (exIdx >= examTotal) break;
                ExamItem examItem = examItems.get(exIdx);
                String no = examItem.getNo();
                String answer = examItem.getAnswer();
                rowText.append(no).append(".").append(answer).append(" ");
            }
            rowText.append("\r");
            CharacterRun characterRun = range.insertBefore(rowText.toString());
        }

        wordFile.write(new File(newFilePath));
    }
}

  

四、答案巢狀在題目裡的處理

選項巢狀在選項,題目中,需要再寫邏輯判斷

為了處理這種型別的題庫文件,單開了一個新的工具類處理

細節部分看程式碼實現就行

package cn.cloud9.word;

import com.alibaba.druid.util.StringUtils;
import lombok.*;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.CharacterProperties;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;

import java.io.File;
import java.io.FileInputStream;
import java.util.*;
import java.util.stream.Collectors;

public class ExamUtil2 {
    // private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "參考答案:");
    private static final List<String> ANSWER_IDENT = Arrays.asList("(正確答案)", "【正確答案】");
    private static final List<String> ANSWER_IDENT2 = Arrays.asList("×", "√");
    private static final List<String> ANSWER_IDENT3 = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
    private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");;
    private static final List<String> OPTIONS2 = Arrays.asList("A、", "B、", "C、", "D、", "E、", "F、", "G、");
    private static final String NUMBER_REGEXP = "^[1-9]\\d*";
    private static final String SPLIT_IDENTIFY = "\\.";

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class RoughItem {
        public int serial;
        public String exCode;
        public String content;
    }

    @Data
    @AllArgsConstructor
    @NoArgsConstructor
    @Builder
    @ToString
    public static final class ExamItem {
        public String no;
        public String title;
        public String type;
        public String answer;
        public String explain;
    }

    @SneakyThrows
    public static XWPFDocument getWordFileDocxType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream);
        fileInputStream.close();
        return xwpfDocument;
    }

    @SneakyThrows
    public static HWPFDocument getWordFileDocType(String path) {
        FileInputStream fileInputStream = new FileInputStream(path);
        HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream);
        fileInputStream.close();
        return hwpfDocument;
    }


    @SneakyThrows
    public static void main(String[] args) {
        int examCount = 0;
        String exCode = "";
        List<RoughItem> roughItems = new ArrayList<>();
        CharacterProperties props = new CharacterProperties();
        props.setFontSize(32);

        String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 (   )高階保育師理論題庫增加.doc";
        String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 (   )高階保育師理論題庫增加- " + new Date().getTime() + ".doc";
        HWPFDocument wordFile = getWordFileDocType(filePath);
        Range range = wordFile.getRange();
        int numParagraphs = range.numParagraphs();


        for (int i = 0; i < numParagraphs; i++) {
            Paragraph paragraph = range.getParagraph(i);
            String text = paragraph.text();
            if (StringUtils.isEmpty(text)) continue;

            /* 按點號分割字串 */
            String[] split = text.split(SPLIT_IDENTIFY);
            /* 首個字串是否匹配數值序號 */
            boolean isExamNo = split[0].matches(NUMBER_REGEXP);
            /* 是否為選項 */
            boolean isOptions = OPTIONS.contains(split[0]) || OPTIONS2.stream().anyMatch(text::contains);
            /* 是否為答案 */
            boolean rightOption = ANSWER_IDENT.stream().anyMatch(text::contains) && isOptions; /* 答案在選項中 */
            boolean rightOption2 = ANSWER_IDENT2.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在題目裡面 */
            boolean rightOption3 = ANSWER_IDENT3.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在題目裡面 */
            boolean isAnswer = rightOption || rightOption2 || rightOption3;


            /* 當判斷為題目序列時,迭代計數變數,是一道新的題目 */
            if (isExamNo) {
                ++ examCount;
                exCode = split[0];
                ExamUtil2.RoughItem roughItem = ExamUtil2.RoughItem.builder()
                        .serial(examCount)
                        .content(text)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            }
            if (isAnswer) {
                String correctOption = "";
                if (rightOption) {
                    for (String answer : ANSWER_IDENT) text = text.replaceAll(answer, "");
                    paragraph.replaceText(text, false);
                    correctOption = String.valueOf(text.charAt(0));
                }
                if (rightOption2) {
                    correctOption = text.contains(ANSWER_IDENT2.get(0)) ? ANSWER_IDENT2.get(0) : ANSWER_IDENT2.get(1);
                    for (String answer : ANSWER_IDENT2)  text = text.replaceAll(answer, "");
                    paragraph.replaceText(text, false);
                }
                if (rightOption3) {
                    for (String option : ANSWER_IDENT3) {
                        if (text.contains(option)) {
                            correctOption = option;
                            text = text.replaceAll(option, "");
                            break;
                        }
                    }
                    paragraph.replaceText(text, false);
                }
                RoughItem roughItem = RoughItem.builder()
                        .serial(examCount)
                        .content(correctOption)
                        .exCode(exCode)
                        .build() ;
                roughItems.add(roughItem);
            }
        }

        List<ExamItem> examItems = new ArrayList<>();
        /* 收集完成後使用序列進行分組處理 */
        Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial));
        listMap.forEach((k, v) -> {
            if (v.size() == 1) return;
            /* 第一項一定是題目 */
            RoughItem titleItem = v.get(0);
            String content = titleItem.getContent();
            content = content.replaceAll("\r", "");
            /* 處理集合得到答案和解析,解析不一定存在,所以orElse設定空串預設值 */
            String answer = v.get(1).content;
            answer = answer.replaceAll("\r", "");
            /* 包裝成題目物件後給呼叫者消費 */
            ExamItem build = ExamItem
                    .builder()
                    .no(titleItem.getExCode())
                    .title(content)
                    .type(null)
                    .answer(answer)
                    .explain(null)
                    .build();
            examItems.add(build);
        });

        examItems.forEach(System.out::println);

        /* 建立一行para,寫10個答案上來 */
        int examTotal = examItems.size();
        int rowSize = 10;
        boolean isComplete = examTotal % rowSize == 0;
        int totalRow = examTotal / rowSize;
        totalRow = isComplete ? totalRow : totalRow + 1;
        for (int currentRow = totalRow; currentRow >= 1; currentRow--) {
            int begin = (currentRow - 1) * rowSize;
            int end = (currentRow * rowSize) - 1;
            StringBuilder rowText = new StringBuilder();
            for (int exIdx = begin; exIdx <= end; exIdx++) {
                if (exIdx < 0) break;
                else if (exIdx >= examTotal) break;
                ExamItem examItem = examItems.get(exIdx);
                String no = examItem.getNo();
                String answer = examItem.getAnswer();
                rowText.append(no).append(".").append(answer).append(" ");
            }
            rowText.append("\r");
            CharacterRun characterRun = range.insertBefore(rowText.toString());
        }

         wordFile.write(new File(newFilePath));
    }
}

  

相關文章