|
@@ -5,6 +5,7 @@ import java.nio.file.Files;
|
5
|
5
|
import java.nio.file.Path;
|
6
|
6
|
import java.nio.file.Paths;
|
7
|
7
|
import java.util.*;
|
|
8
|
+import java.util.regex.Pattern;
|
8
|
9
|
|
9
|
10
|
import com.alibaba.fastjson2.JSONObject;
|
10
|
11
|
import com.ruoyi.common.config.RuoYiConfig;
|
|
@@ -37,12 +38,8 @@ import io.milvus.v2.service.vector.request.data.FloatVec;
|
37
|
38
|
import io.milvus.v2.service.vector.response.SearchResp;
|
38
|
39
|
import org.apache.poi.extractor.ExtractorFactory;
|
39
|
40
|
import org.apache.poi.extractor.POITextExtractor;
|
40
|
|
-import org.apache.poi.xwpf.usermodel.BreakType;
|
41
|
|
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
42
|
|
-import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
43
|
|
-import org.apache.poi.xwpf.usermodel.XWPFRun;
|
|
41
|
+import org.apache.poi.xwpf.usermodel.*;
|
44
|
42
|
import org.apache.xmlbeans.XmlCursor;
|
45
|
|
-import org.noear.solon.Solon;
|
46
|
43
|
import org.noear.solon.ai.chat.ChatModel;
|
47
|
44
|
import org.noear.solon.ai.chat.ChatResponse;
|
48
|
45
|
import org.noear.solon.ai.chat.ChatSession;
|
|
@@ -475,7 +472,7 @@ public class CmcAgentServiceImpl implements ICmcAgentService
|
475
|
472
|
String paragraphText = paragraph.getText().trim();
|
476
|
473
|
|
477
|
474
|
for (String title : titles) {
|
478
|
|
- if (paragraphText.contains(title)) {
|
|
475
|
+ if (paragraphText.equals(title)) {
|
479
|
476
|
titlePositions.add(i);
|
480
|
477
|
contents.add(titleContentMap.getOrDefault(title, ""));
|
481
|
478
|
break;
|
|
@@ -486,18 +483,8 @@ public class CmcAgentServiceImpl implements ICmcAgentService
|
486
|
483
|
// 从后往前插入内容,避免位置偏移
|
487
|
484
|
for (int i = titlePositions.size() - 1; i >= 0; i--) {
|
488
|
485
|
int insertPos = titlePositions.get(i) + 1;
|
489
|
|
- if (insertPos < paragraphs.size()) {
|
490
|
|
- XmlCursor xmlCursor = paragraphs.get(insertPos).getCTP().newCursor();
|
491
|
|
- XWPFParagraph contentParagraph = document.insertNewParagraph(xmlCursor);
|
492
|
|
- contentParagraph.setStyle("1"); // 正文样式
|
493
|
|
-
|
494
|
|
- XWPFRun run = contentParagraph.createRun();
|
495
|
|
- String content = contents.get(i);
|
496
|
|
- if (content != null && !content.trim().isEmpty()) {
|
497
|
|
- run.setText(content);
|
498
|
|
- }
|
499
|
|
- }
|
500
|
|
-
|
|
486
|
+ String content = contents.get(i);
|
|
487
|
+ generateWordDocument(content, document, insertPos);
|
501
|
488
|
// 更新进度
|
502
|
489
|
processValue = "章节内容写入: " + Double.parseDouble(String.format("%.2f%n", (double) (i + 1) / titles.size() * 100)) + "%";
|
503
|
490
|
|
|
@@ -578,7 +565,11 @@ public class CmcAgentServiceImpl implements ICmcAgentService
|
578
|
565
|
"6.1.1 XX\n" +
|
579
|
566
|
"6.1.2 XX\n" +
|
580
|
567
|
"6.2 XX\n" +
|
581
|
|
- "6.2.1 XX";
|
|
568
|
+ "6.2.1 XX\n" +
|
|
569
|
+ "6.2.2 XX\n" +
|
|
570
|
+ "6.3 XX\n" +
|
|
571
|
+ "6.3.1 XX\n" +
|
|
572
|
+ "6.3.2 XX\n";
|
582
|
573
|
String content = generateAnswer(sb);
|
583
|
574
|
writeTitles(content, templatePath);
|
584
|
575
|
return content;
|
|
@@ -857,4 +848,245 @@ public class CmcAgentServiceImpl implements ICmcAgentService
|
857
|
848
|
return splitter.split(document);
|
858
|
849
|
}
|
859
|
850
|
|
|
851
|
+ public void generateWordDocument(String content, XWPFDocument document, int insertPos) {
|
|
852
|
+ try {
|
|
853
|
+ // 规范化锚点:在 insertPos 对应段落之前插入;若越界则附加到末尾
|
|
854
|
+ List<XWPFParagraph> paragraphs = document.getParagraphs();
|
|
855
|
+ if (paragraphs.isEmpty()) {
|
|
856
|
+ document.createParagraph();
|
|
857
|
+ paragraphs = document.getParagraphs();
|
|
858
|
+ }
|
|
859
|
+ if (insertPos < 0) {
|
|
860
|
+ insertPos = 0;
|
|
861
|
+ }
|
|
862
|
+ if (insertPos >= paragraphs.size()) {
|
|
863
|
+ // 在文末追加一个空段落作为锚点
|
|
864
|
+ XWPFParagraph tail = document.createParagraph();
|
|
865
|
+ paragraphs = document.getParagraphs();
|
|
866
|
+ insertPos = paragraphs.indexOf(tail);
|
|
867
|
+ }
|
|
868
|
+
|
|
869
|
+ String[] lines = content.split("\n");
|
|
870
|
+ boolean inTable = false;
|
|
871
|
+ List<String> tableLines = new ArrayList<>();
|
|
872
|
+
|
|
873
|
+ // 将内容解析为块(普通段落/表格/标题/列表项等),然后倒序插入,确保顺序正确
|
|
874
|
+ class Block {
|
|
875
|
+ String type; // para | table | h4 | list1 | list2 | imageText
|
|
876
|
+ String text;
|
|
877
|
+ List<String> table;
|
|
878
|
+ Integer h4Index; // 预计算的四级标题编号
|
|
879
|
+ Integer list1Index; // 预计算的一级列表编号 1),2),3)
|
|
880
|
+ Integer list2Index; // 预计算的二级列表编号 (1),(2),(3)
|
|
881
|
+ Block(String t, String x) { type = t; text = x; }
|
|
882
|
+ Block(List<String> tbl) { type = "table"; table = new ArrayList<>(tbl); }
|
|
883
|
+ Block(String t, String x, Integer h4Idx, Integer l1Idx, Integer l2Idx) {
|
|
884
|
+ type = t; text = x; h4Index = h4Idx; list1Index = l1Idx; list2Index = l2Idx;
|
|
885
|
+ }
|
|
886
|
+ }
|
|
887
|
+ List<Block> blocks = new ArrayList<>();
|
|
888
|
+
|
|
889
|
+ // 预计算编号(按正序扫描)
|
|
890
|
+ int h4Counter = 0;
|
|
891
|
+ int list1Counter = 0;
|
|
892
|
+ int list2Counter = 0;
|
|
893
|
+
|
|
894
|
+ for (String raw : lines) {
|
|
895
|
+ String line = raw.trim();
|
|
896
|
+ if (line.isEmpty()) {
|
|
897
|
+ blocks.add(new Block("para", "")); // 空段落
|
|
898
|
+ continue;
|
|
899
|
+ }
|
|
900
|
+
|
|
901
|
+ if (line.startsWith("#")) {
|
|
902
|
+ // 跳过前三级标题
|
|
903
|
+ continue;
|
|
904
|
+ } else if (line.startsWith("####")) {
|
|
905
|
+ // 四级标题,编号采用预计算,保证倒序插入后视觉顺序仍递增
|
|
906
|
+ String title = line.replace("####", "").trim();
|
|
907
|
+ title = title.replaceFirst("^\\d+\\.\\s*", "");
|
|
908
|
+ h4Counter++;
|
|
909
|
+ // 碰到新小节,重置列表编号
|
|
910
|
+ list1Counter = 0;
|
|
911
|
+ list2Counter = 0;
|
|
912
|
+ blocks.add(new Block("h4", title, h4Counter, null, null));
|
|
913
|
+ } else if (line.startsWith("|")) {
|
|
914
|
+ // 表格模式开始
|
|
915
|
+ if (!inTable) {
|
|
916
|
+ inTable = true;
|
|
917
|
+ tableLines.clear();
|
|
918
|
+ }
|
|
919
|
+ tableLines.add(line);
|
|
920
|
+ } else if (inTable && !line.startsWith("|")) {
|
|
921
|
+ // 表格结束,收集为一个块
|
|
922
|
+ inTable = false;
|
|
923
|
+ if (!tableLines.isEmpty()) {
|
|
924
|
+ blocks.add(new Block(new ArrayList<>(tableLines)));
|
|
925
|
+ tableLines.clear();
|
|
926
|
+ }
|
|
927
|
+ // 当前行按普通段落处理
|
|
928
|
+ blocks.add(new Block("para", line));
|
|
929
|
+ } else if (inTable) {
|
|
930
|
+ tableLines.add(line);
|
|
931
|
+ } else if (line.startsWith("- **")) {
|
|
932
|
+ // 一级加粗列表项,编号为 1),2),3) —— 预计算编号
|
|
933
|
+ String listItem = line.replaceFirst("- \\\\*\\\\*", "").replace("**", "");
|
|
934
|
+ listItem = listItem.replaceFirst("^\\*\\*", "").replace("**:", ":");
|
|
935
|
+ list1Counter++;
|
|
936
|
+ // 一级列表开始时,重置二级列表编号
|
|
937
|
+ list2Counter = 0;
|
|
938
|
+ Block b = new Block("list1", listItem, null, list1Counter, null);
|
|
939
|
+ blocks.add(b);
|
|
940
|
+ } else if (line.startsWith(" - **")) {
|
|
941
|
+ // 二级加粗列表项,编号为 (1),(2),(3) —— 预计算编号
|
|
942
|
+ String listItem = line.replaceFirst(" - \\\\*\\\\*", "").replace("**", "");
|
|
943
|
+ listItem = listItem.replaceFirst("^\\*\\*", "").replace("**:", ":");
|
|
944
|
+ list2Counter++;
|
|
945
|
+ Block b = new Block("list2", listItem, null, null, list2Counter);
|
|
946
|
+ blocks.add(b);
|
|
947
|
+ } else if (line.startsWith(" && line.contains(")")) {
|
|
948
|
+ // 图片 markdown,转成说明文本
|
|
949
|
+ String imageText = extractImageDescription(line);
|
|
950
|
+ blocks.add(new Block("imageText", imageText));
|
|
951
|
+ } else if (line.contains("http") && (line.contains(".png") || line.contains(".jpg") || line.contains(".jpeg") || line.contains(".gif"))) {
|
|
952
|
+ blocks.add(new Block("imageText", "图片链接: " + line));
|
|
953
|
+ } else {
|
|
954
|
+ blocks.add(new Block("para", line));
|
|
955
|
+ }
|
|
956
|
+ }
|
|
957
|
+ // 收尾:如果文件最后是表格仍未输出
|
|
958
|
+ if (inTable && !tableLines.isEmpty()) {
|
|
959
|
+ blocks.add(new Block(new ArrayList<>(tableLines)));
|
|
960
|
+ tableLines.clear();
|
|
961
|
+ }
|
|
962
|
+
|
|
963
|
+ // 倒序插入到 insertPos 前(即插在 insertPos 之前,从后往前插保证最终顺序)
|
|
964
|
+ for (int i = blocks.size() - 1; i >= 0; i--) {
|
|
965
|
+ Block b = blocks.get(i);
|
|
966
|
+ // 每次获取最新段落列表和锚点,避免索引失效
|
|
967
|
+ paragraphs = document.getParagraphs();
|
|
968
|
+ if (insertPos >= paragraphs.size()) {
|
|
969
|
+ XWPFParagraph tail = document.createParagraph();
|
|
970
|
+ paragraphs = document.getParagraphs();
|
|
971
|
+ insertPos = paragraphs.indexOf(tail);
|
|
972
|
+ }
|
|
973
|
+ XmlCursor cursor = paragraphs.get(insertPos).getCTP().newCursor();
|
|
974
|
+
|
|
975
|
+ switch (b.type) {
|
|
976
|
+ case "h4": {
|
|
977
|
+ XWPFParagraph p = document.insertNewParagraph(cursor);
|
|
978
|
+ XWPFRun run = p.createRun();
|
|
979
|
+ int num = (b.h4Index != null ? b.h4Index : 1);
|
|
980
|
+ run.setText("6.1.1." + num + " " + b.text);
|
|
981
|
+ run.setBold(true);
|
|
982
|
+ run.setFontSize(14);
|
|
983
|
+ break;
|
|
984
|
+ }
|
|
985
|
+ case "list1": {
|
|
986
|
+ XWPFParagraph p = document.insertNewParagraph(cursor);
|
|
987
|
+ XWPFRun run = p.createRun();
|
|
988
|
+ int num = (b.list1Index != null ? b.list1Index : 1);
|
|
989
|
+ run.setText(num + ") " + b.text);
|
|
990
|
+ run.setFontSize(12);
|
|
991
|
+ break;
|
|
992
|
+ }
|
|
993
|
+ case "list2": {
|
|
994
|
+ XWPFParagraph p = document.insertNewParagraph(cursor);
|
|
995
|
+ XWPFRun run = p.createRun();
|
|
996
|
+ int num = (b.list2Index != null ? b.list2Index : 1);
|
|
997
|
+ run.setText(" (" + num + ") " + b.text);
|
|
998
|
+ run.setFontSize(12);
|
|
999
|
+ break;
|
|
1000
|
+ }
|
|
1001
|
+ case "imageText": {
|
|
1002
|
+ XWPFParagraph p = document.insertNewParagraph(cursor);
|
|
1003
|
+ XWPFRun run = p.createRun();
|
|
1004
|
+ run.setText(b.text);
|
|
1005
|
+ run.setFontSize(12);
|
|
1006
|
+ run.setItalic(true);
|
|
1007
|
+ run.setColor("808080");
|
|
1008
|
+ break;
|
|
1009
|
+ }
|
|
1010
|
+ case "table": {
|
|
1011
|
+ // 解析 markdown 表格并插入在 cursor 位置
|
|
1012
|
+ List<String[]> tableData = new ArrayList<>();
|
|
1013
|
+ Pattern sep = Pattern.compile("^\\|\\s*(-+\\s*\\|\\s*)*-+\\s*\\|?$");
|
|
1014
|
+ for (String tl : b.table) {
|
|
1015
|
+ if (sep.matcher(tl).matches()) continue;
|
|
1016
|
+ String t = tl.trim();
|
|
1017
|
+ if (t.startsWith("|")) t = t.substring(1);
|
|
1018
|
+ if (t.endsWith("|")) t = t.substring(0, t.length() - 1);
|
|
1019
|
+ String[] cells = t.split("\\|", -1);
|
|
1020
|
+ for (int c = 0; c < cells.length; c++) cells[c] = cells[c].trim();
|
|
1021
|
+ tableData.add(cells);
|
|
1022
|
+ }
|
|
1023
|
+ if (!tableData.isEmpty()) {
|
|
1024
|
+ XWPFTable table = document.insertNewTbl(cursor);
|
|
1025
|
+ table.setStyleID("10");
|
|
1026
|
+ // 初始化行列
|
|
1027
|
+ int rows = tableData.size();
|
|
1028
|
+ int cols = tableData.get(0).length;
|
|
1029
|
+ while (table.getNumberOfRows() < rows) table.createRow();
|
|
1030
|
+ for (int r = 0; r < rows; r++) {
|
|
1031
|
+ XWPFTableRow tr = table.getRow(r);
|
|
1032
|
+ // 首行已存在的列数可能不足,补齐
|
|
1033
|
+ while (tr.getTableCells().size() < cols) tr.addNewTableCell();
|
|
1034
|
+ String[] rowData = tableData.get(r);
|
|
1035
|
+ for (int c = 0; c < cols; c++) {
|
|
1036
|
+ XWPFTableCell cell = tr.getCell(c);
|
|
1037
|
+ cell.removeParagraph(0);
|
|
1038
|
+ XWPFParagraph cp = cell.addParagraph();
|
|
1039
|
+ XWPFRun cr = cp.createRun();
|
|
1040
|
+ cr.setText(rowData[c]);
|
|
1041
|
+ if (r == 0) {
|
|
1042
|
+ cr.setBold(true);
|
|
1043
|
+ cp.setAlignment(ParagraphAlignment.CENTER);
|
|
1044
|
+ } else {
|
|
1045
|
+ cp.setAlignment(ParagraphAlignment.LEFT);
|
|
1046
|
+ }
|
|
1047
|
+ }
|
|
1048
|
+ }
|
|
1049
|
+ table.setWidth("100%");
|
|
1050
|
+ }
|
|
1051
|
+ break;
|
|
1052
|
+ }
|
|
1053
|
+ case "para":
|
|
1054
|
+ default: {
|
|
1055
|
+ XWPFParagraph p = document.insertNewParagraph(cursor);
|
|
1056
|
+ XWPFRun run = p.createRun();
|
|
1057
|
+ if (b.text != null) {
|
|
1058
|
+ run.setText(b.text);
|
|
1059
|
+ }
|
|
1060
|
+ run.setFontSize(12);
|
|
1061
|
+ break;
|
|
1062
|
+ }
|
|
1063
|
+ }
|
|
1064
|
+ }
|
|
1065
|
+ } catch (Exception e) {
|
|
1066
|
+ e.printStackTrace();
|
|
1067
|
+ }
|
|
1068
|
+ }
|
|
1069
|
+
|
|
1070
|
+ /**
|
|
1071
|
+ * 从图片Markdown格式中提取描述文本
|
|
1072
|
+ */
|
|
1073
|
+ private String extractImageDescription(String imageLine) {
|
|
1074
|
+ // 格式: 
|
|
1075
|
+ if (imageLine.startsWith(" && imageLine.contains(")")) {
|
|
1076
|
+ int descStart = imageLine.indexOf('[') + 1;
|
|
1077
|
+ int descEnd = imageLine.indexOf(']');
|
|
1078
|
+ int urlStart = imageLine.indexOf('(') + 1;
|
|
1079
|
+ int urlEnd = imageLine.indexOf(')');
|
|
1080
|
+
|
|
1081
|
+ if (descStart > 0 && descEnd > descStart && urlStart > descEnd && urlEnd > urlStart) {
|
|
1082
|
+ String description = imageLine.substring(descStart, descEnd);
|
|
1083
|
+ String url = imageLine.substring(urlStart, urlEnd);
|
|
1084
|
+ return "图: " + description + " (" + url + ")";
|
|
1085
|
+ }
|
|
1086
|
+ }
|
|
1087
|
+ return imageLine; // 如果格式不正确,返回原文本
|
|
1088
|
+ }
|
|
1089
|
+
|
860
|
1090
|
}
|
|
1091
|
+
|
|
1092
|
+
|