初稿見:https://www.cnblogs.com/mindzone/p/18362194
一、新增需求
在原稿題庫之後,還需要生成一份純題目 + 純答案
答案放在開頭,題目裡面去掉答案
在檢查題型時還發現部分內容略有區別:
所以在判斷是否為答案的時候需要相容這種答案
二、關於老版本支援
doc2000版需要追加一個scratchpad的庫支援才行
<dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>5.0.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>5.0.0</version> </dependency> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>5.0.0</version> </dependency>
需要匯入的資源:
import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range;
三、工具類實現
package cn.cloud9.word; import com.alibaba.druid.util.StringUtils; import lombok.*; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.xwpf.usermodel.XWPFDocument; import java.io.File; import java.io.FileInputStream; import java.util.*; import java.util.stream.Collectors; public class ExamUtil { private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "參考答案:"); private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");; private static final String NUMBER_REGEXP = "^[1-9]\\d*"; private static final String SPLIT_IDENTIFY = "\\."; @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class RoughItem { public int serial; public String exCode; public String content; } @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class ExamItem { public String no; public String title; public String type; public String answer; public String explain; } @SneakyThrows public static XWPFDocument getWordFileDocxType(String path) { FileInputStream fileInputStream = new FileInputStream(path); XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream); fileInputStream.close(); return xwpfDocument; } @SneakyThrows public static HWPFDocument getWordFileDocType(String path) { FileInputStream fileInputStream = new FileInputStream(path); HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream); fileInputStream.close(); return hwpfDocument; } @SneakyThrows public static void main(String[] args) { int examCount = 0; String exCode = ""; List<RoughItem> roughItems = new ArrayList<>(); CharacterProperties props = new CharacterProperties(); props.setFontSize(32); String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案 (增加 1301-2100共 800)中級保育師增加題庫 .doc"; String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\答案 (增加 1301-2100共 800)中級保育師增加題庫 " + new Date().getTime() + ".doc"; HWPFDocument wordFile = getWordFileDocType(filePath); Range range = wordFile.getRange(); int numParagraphs = range.numParagraphs(); for (int i = 0; i < numParagraphs; i++) { Paragraph paragraph = range.getParagraph(i); String text = paragraph.text(); if (StringUtils.isEmpty(text)) continue; /* 按點號分割字串 */ String[] split = text.split(SPLIT_IDENTIFY); /* 首個字串是否匹配數值序號 */ boolean isExamNo = split[0].matches(NUMBER_REGEXP); /* 是否為答案 */ boolean isAnswer = text.startsWith(ANSWER_PREFIX.get(0)) || text.startsWith(ANSWER_PREFIX.get(1)); /* 是否為選項 */ boolean isOptions = OPTIONS.contains(split[0]); /* 當判斷為題目序列時,迭代計數變數,是一道新的題目 */ if (isExamNo) { ++ examCount; exCode = split[0]; ExamUtil.RoughItem roughItem = ExamUtil.RoughItem.builder() .serial(examCount) .content(text) .exCode(exCode) .build() ; roughItems.add(roughItem); } else if (isAnswer || isOptions) { /* 反之不是題目序列,而是選項,答案,解析時,儲存起來 */ RoughItem roughItem = RoughItem.builder() .serial(examCount) .content(text) .exCode(exCode) .build() ; roughItems.add(roughItem); } /* 答案部分是一個完整段落,所以對其刪除即可 */ if (isAnswer) paragraph.delete(); } List<ExamItem> examItems = new ArrayList<>(); /* 收集完成後使用序列進行分組處理 */ Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial)); listMap.forEach((k, v) -> { /* 第一項一定是題目 */ RoughItem titleItem = v.get(0); String content = titleItem.getContent(); content = content.replaceAll("\r", ""); /* 處理集合得到答案和解析,解析不一定存在,所以orElse設定空串預設值 */ String answer = v.stream() .map(RoughItem::getContent) .filter(xContent -> xContent.startsWith(ANSWER_PREFIX.get(0)) || xContent.startsWith(ANSWER_PREFIX.get(1))) .map(x -> x.replaceAll(ANSWER_PREFIX.get(1), "").replaceAll(ANSWER_PREFIX.get(0), "")) .findFirst() .orElse(""); answer = answer.replaceAll("\r", ""); /* 包裝成題目物件後給呼叫者消費 */ ExamItem build = ExamItem .builder() .no(titleItem.getExCode()) .title(content) .type(null) .answer(answer) .explain(null) .build(); examItems.add(build); }); examItems.forEach(System.out::println); /* 建立一行para,寫N個答案在一行中 rowSize = N */ int examTotal = examItems.size(); int rowSize = 10; boolean isComplete = examTotal % rowSize == 0; int totalRow = examTotal / rowSize; totalRow = isComplete ? totalRow : totalRow + 1; /* 因為用的是insertBefore方式插入,所以需要反著翻頁寫入 */ for (int currentRow = totalRow; currentRow >= 1; currentRow--) { int begin = (currentRow - 1) * rowSize; int end = (currentRow * rowSize) - 1; StringBuilder rowText = new StringBuilder(); for (int exIdx = begin; exIdx <= end; exIdx++) { if (exIdx < 0) break; else if (exIdx >= examTotal) break; ExamItem examItem = examItems.get(exIdx); String no = examItem.getNo(); String answer = examItem.getAnswer(); rowText.append(no).append(".").append(answer).append(" "); } rowText.append("\r"); CharacterRun characterRun = range.insertBefore(rowText.toString()); } wordFile.write(new File(newFilePath)); } }
四、答案巢狀在題目裡的處理
選項巢狀在選項,題目中,需要再寫邏輯判斷
為了處理這種型別的題庫文件,單開了一個新的工具類處理
細節部分看程式碼實現就行
package cn.cloud9.word; import com.alibaba.druid.util.StringUtils; import lombok.*; import org.apache.poi.hwpf.HWPFDocument; import org.apache.poi.hwpf.usermodel.CharacterProperties; import org.apache.poi.hwpf.usermodel.CharacterRun; import org.apache.poi.hwpf.usermodel.Paragraph; import org.apache.poi.hwpf.usermodel.Range; import org.apache.poi.xwpf.usermodel.XWPFDocument; import java.io.File; import java.io.FileInputStream; import java.util.*; import java.util.stream.Collectors; public class ExamUtil2 { // private static final List<String> ANSWER_PREFIX = Arrays.asList("答案:", "參考答案:"); private static final List<String> ANSWER_IDENT = Arrays.asList("(正確答案)", "【正確答案】"); private static final List<String> ANSWER_IDENT2 = Arrays.asList("×", "√"); private static final List<String> ANSWER_IDENT3 = Arrays.asList("A", "B", "C", "D", "E", "F", "G");; private static final List<String> OPTIONS = Arrays.asList("A", "B", "C", "D", "E", "F", "G");; private static final List<String> OPTIONS2 = Arrays.asList("A、", "B、", "C、", "D、", "E、", "F、", "G、"); private static final String NUMBER_REGEXP = "^[1-9]\\d*"; private static final String SPLIT_IDENTIFY = "\\."; @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class RoughItem { public int serial; public String exCode; public String content; } @Data @AllArgsConstructor @NoArgsConstructor @Builder @ToString public static final class ExamItem { public String no; public String title; public String type; public String answer; public String explain; } @SneakyThrows public static XWPFDocument getWordFileDocxType(String path) { FileInputStream fileInputStream = new FileInputStream(path); XWPFDocument xwpfDocument = new XWPFDocument(fileInputStream); fileInputStream.close(); return xwpfDocument; } @SneakyThrows public static HWPFDocument getWordFileDocType(String path) { FileInputStream fileInputStream = new FileInputStream(path); HWPFDocument hwpfDocument = new HWPFDocument(fileInputStream); fileInputStream.close(); return hwpfDocument; } @SneakyThrows public static void main(String[] args) { int examCount = 0; String exCode = ""; List<RoughItem> roughItems = new ArrayList<>(); CharacterProperties props = new CharacterProperties(); props.setFontSize(32); String filePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 ( )高階保育師理論題庫增加.doc"; String newFilePath = "C:\\Users\\Administrator\\Documents\\Tencent Files\\1791255334\\FileRecv\\11 ( )高階保育師理論題庫增加- " + new Date().getTime() + ".doc"; HWPFDocument wordFile = getWordFileDocType(filePath); Range range = wordFile.getRange(); int numParagraphs = range.numParagraphs(); for (int i = 0; i < numParagraphs; i++) { Paragraph paragraph = range.getParagraph(i); String text = paragraph.text(); if (StringUtils.isEmpty(text)) continue; /* 按點號分割字串 */ String[] split = text.split(SPLIT_IDENTIFY); /* 首個字串是否匹配數值序號 */ boolean isExamNo = split[0].matches(NUMBER_REGEXP); /* 是否為選項 */ boolean isOptions = OPTIONS.contains(split[0]) || OPTIONS2.stream().anyMatch(text::contains); /* 是否為答案 */ boolean rightOption = ANSWER_IDENT.stream().anyMatch(text::contains) && isOptions; /* 答案在選項中 */ boolean rightOption2 = ANSWER_IDENT2.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在題目裡面 */ boolean rightOption3 = ANSWER_IDENT3.stream().anyMatch(text::contains) && isExamNo; /* 答案填放在題目裡面 */ boolean isAnswer = rightOption || rightOption2 || rightOption3; /* 當判斷為題目序列時,迭代計數變數,是一道新的題目 */ if (isExamNo) { ++ examCount; exCode = split[0]; ExamUtil2.RoughItem roughItem = ExamUtil2.RoughItem.builder() .serial(examCount) .content(text) .exCode(exCode) .build() ; roughItems.add(roughItem); } if (isAnswer) { String correctOption = ""; if (rightOption) { for (String answer : ANSWER_IDENT) text = text.replaceAll(answer, ""); paragraph.replaceText(text, false); correctOption = String.valueOf(text.charAt(0)); } if (rightOption2) { correctOption = text.contains(ANSWER_IDENT2.get(0)) ? ANSWER_IDENT2.get(0) : ANSWER_IDENT2.get(1); for (String answer : ANSWER_IDENT2) text = text.replaceAll(answer, ""); paragraph.replaceText(text, false); } if (rightOption3) { for (String option : ANSWER_IDENT3) { if (text.contains(option)) { correctOption = option; text = text.replaceAll(option, ""); break; } } paragraph.replaceText(text, false); } RoughItem roughItem = RoughItem.builder() .serial(examCount) .content(correctOption) .exCode(exCode) .build() ; roughItems.add(roughItem); } } List<ExamItem> examItems = new ArrayList<>(); /* 收集完成後使用序列進行分組處理 */ Map<Integer, List<RoughItem>> listMap = roughItems.stream().collect(Collectors.groupingBy(RoughItem::getSerial)); listMap.forEach((k, v) -> { if (v.size() == 1) return; /* 第一項一定是題目 */ RoughItem titleItem = v.get(0); String content = titleItem.getContent(); content = content.replaceAll("\r", ""); /* 處理集合得到答案和解析,解析不一定存在,所以orElse設定空串預設值 */ String answer = v.get(1).content; answer = answer.replaceAll("\r", ""); /* 包裝成題目物件後給呼叫者消費 */ ExamItem build = ExamItem .builder() .no(titleItem.getExCode()) .title(content) .type(null) .answer(answer) .explain(null) .build(); examItems.add(build); }); examItems.forEach(System.out::println); /* 建立一行para,寫10個答案上來 */ int examTotal = examItems.size(); int rowSize = 10; boolean isComplete = examTotal % rowSize == 0; int totalRow = examTotal / rowSize; totalRow = isComplete ? totalRow : totalRow + 1; for (int currentRow = totalRow; currentRow >= 1; currentRow--) { int begin = (currentRow - 1) * rowSize; int end = (currentRow * rowSize) - 1; StringBuilder rowText = new StringBuilder(); for (int exIdx = begin; exIdx <= end; exIdx++) { if (exIdx < 0) break; else if (exIdx >= examTotal) break; ExamItem examItem = examItems.get(exIdx); String no = examItem.getNo(); String answer = examItem.getAnswer(); rowText.append(no).append(".").append(answer).append(" "); } rowText.append("\r"); CharacterRun characterRun = range.insertBefore(rowText.toString()); } wordFile.write(new File(newFilePath)); } }