|
@@ -24,11 +24,10 @@ import io.milvus.param.collection.LoadCollectionParam;
|
24
|
24
|
import io.milvus.param.collection.ReleaseCollectionParam;
|
25
|
25
|
import io.milvus.param.dml.SearchParam;
|
26
|
26
|
import io.milvus.response.SearchResultsWrapper;
|
27
|
|
-import org.apache.poi.hwpf.HWPFDocument;
|
|
27
|
+import org.apache.poi.extractor.POITextExtractor;
|
|
28
|
+import org.apache.poi.extractor.ExtractorFactory;
|
28
|
29
|
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
29
|
|
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
30
|
|
-import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
31
|
|
-import org.apache.poi.xwpf.usermodel.XWPFRun;
|
|
30
|
+import org.apache.poi.xwpf.usermodel.*;
|
32
|
31
|
import org.noear.solon.Solon;
|
33
|
32
|
import org.noear.solon.ai.annotation.ToolMapping;
|
34
|
33
|
import org.noear.solon.ai.chat.ChatModel;
|
|
@@ -65,9 +64,13 @@ public class McpServiceImpl implements IMcpService {
|
65
|
64
|
@Param(description = "章节名称") String title,
|
66
|
65
|
@Param(description = "技术文件地址") String templatePath) throws IOException
|
67
|
66
|
{
|
68
|
|
- title = String.join(",", extractSubTitles( "/upload/agent/template/technical.docx", title));
|
69
|
|
- List<JSONObject> contexts = retrieveFromMilvus(milvusClient, embeddingModel, collectionName, title, 10);
|
70
|
|
- return generateAnswerWithDocumentAndCollection(embeddingModel, agentName, templatePath, title, contexts, "http://192.168.28.188:8000/v1/chat/completions");
|
|
67
|
+ try {
|
|
68
|
+ title = String.join(",", extractSubTitles( "/upload/agent/template/technical.docx", title));
|
|
69
|
+ List<JSONObject> contexts = retrieveFromMilvus(milvusClient, embeddingModel, collectionName, title, 10);
|
|
70
|
+ return generateAnswerWithDocumentAndCollection(embeddingModel, agentName, templatePath, title, contexts, "http://192.168.28.188:8000/v1/chat/completions");
|
|
71
|
+ } catch (IOException e) {
|
|
72
|
+ throw new RuntimeException(e);
|
|
73
|
+ }
|
71
|
74
|
}
|
72
|
75
|
|
73
|
76
|
/**
|
|
@@ -91,7 +94,12 @@ public class McpServiceImpl implements IMcpService {
|
91
|
94
|
*/
|
92
|
95
|
public AssistantMessage generateAnswerWithDocumentAndCollection(EmbeddingModel embeddingModel, String agentName, String templatePath, String question, List<JSONObject> contexts, String llmServiceUrl) throws IOException {
|
93
|
96
|
StringBuilder sb = new StringBuilder("招标文件内容:\n\n");
|
94
|
|
- File profilePath = new File(templatePath.replace("/dev-api/profile", Solon.cfg().getProperty("cmc.profile")).replace("_" + agentName, ""));
|
|
97
|
+ String filename = templatePath.replace("/dev-api/profile", Solon.cfg().getProperty("cmc.profile")).replace("_" + agentName, "");
|
|
98
|
+ File profilePath = new File(filename);
|
|
99
|
+ if (!profilePath.exists()) {
|
|
100
|
+ filename = filename.replace(".docx", ".doc");
|
|
101
|
+ profilePath = new File(filename);
|
|
102
|
+ }
|
95
|
103
|
List<TextSegment> segments = splitDocument(profilePath);
|
96
|
104
|
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
|
97
|
105
|
InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
|
|
@@ -131,7 +139,7 @@ public class McpServiceImpl implements IMcpService {
|
131
|
139
|
|
132
|
140
|
ChatResponse response = chatModel.prompt(chatSession).call();
|
133
|
141
|
String content = response.lastChoice().getMessage().getContent() + "\n\n" +
|
134
|
|
- "招标文件分析完成,章节内容已写入【<a href='" + templatePath.replace("/dev-api", "") + "'> 技术文件" + "</a>】,请查阅";
|
|
142
|
+ "招标文件分析完成,章节内容已写入【<a href='" + templatePath + "'> 技术文件" + "</a>】,请查阅";
|
135
|
143
|
String absolutePath = templatePath.replace("/dev-api/profile", Solon.cfg().getProperty("cmc.profile"));
|
136
|
144
|
writeContent(response.lastChoice().getMessage().getContent(), question, absolutePath);
|
137
|
145
|
return ChatMessage.ofAssistant(content);
|
|
@@ -144,47 +152,60 @@ public class McpServiceImpl implements IMcpService {
|
144
|
152
|
public void writeContent(String content, String question, String absolutePath) throws IOException {
|
145
|
153
|
File file = new File(absolutePath);
|
146
|
154
|
FileInputStream fileInputStream = new FileInputStream(file);
|
147
|
|
- XWPFDocument document = new XWPFDocument(fileInputStream);
|
148
|
|
- String[] contentLines = content.split("\n");
|
149
|
|
- Map<String, String> map = new HashMap<>();
|
150
|
|
- String[] titles = question.split(",");
|
151
|
|
- for (int i = 0; i < titles.length; i ++) {
|
152
|
|
- int startIndex = Arrays.asList(contentLines).indexOf(titles[i]);
|
153
|
|
- StringBuilder text = new StringBuilder("");
|
154
|
|
- if (i < titles.length - 1) {
|
155
|
|
- int endIndex = Arrays.asList(contentLines).indexOf(titles[i + 1]);
|
156
|
|
- for (int c = startIndex + 1; c < endIndex; c++)
|
157
|
|
- text.append(contentLines[c]);
|
158
|
|
- }
|
159
|
|
- else {
|
160
|
|
- if (startIndex + 1 < contentLines.length)
|
161
|
|
- for (int c = startIndex + 1; c < contentLines.length; c++)
|
|
155
|
+ try (XWPFDocument document = new XWPFDocument(fileInputStream)) {
|
|
156
|
+ String[] contentLines = content.split("\n");
|
|
157
|
+ Map<String, String> map = new HashMap<>();
|
|
158
|
+ String[] titles = question.split(",");
|
|
159
|
+ for (int i = 0; i < titles.length; i++) {
|
|
160
|
+ int startIndex = Arrays.asList(contentLines).indexOf(titles[i]);
|
|
161
|
+ StringBuilder text = new StringBuilder("");
|
|
162
|
+ if (i < titles.length - 1) {
|
|
163
|
+ int endIndex = Arrays.asList(contentLines).indexOf(titles[i + 1]);
|
|
164
|
+ for (int c = startIndex + 1; c < endIndex; c++)
|
162
|
165
|
text.append(contentLines[c]);
|
|
166
|
+ } else {
|
|
167
|
+ if (startIndex + 1 < contentLines.length)
|
|
168
|
+ for (int c = startIndex + 1; c < contentLines.length; c++)
|
|
169
|
+ text.append(contentLines[c]);
|
|
170
|
+ }
|
|
171
|
+ map.put(titles[i], text.toString());
|
163
|
172
|
}
|
164
|
|
- map.put(titles[i], text.toString());
|
165
|
|
- }
|
166
|
|
- int count = 0;
|
167
|
|
- for (int i = 0; i < document.getParagraphs().size(); i++) {
|
168
|
|
- XWPFParagraph paragraph = document.getParagraphs().get(i);
|
169
|
|
- for (String title : titles) {
|
170
|
|
- if (paragraph.getText().equals(title)) {
|
171
|
|
- int pos = document.getBodyElements().indexOf(paragraph) + 1;
|
172
|
|
- XWPFParagraph contentParagraph = document.createParagraph();
|
173
|
|
- contentParagraph.setStyle("1");
|
174
|
|
- XWPFRun run = contentParagraph.createRun();
|
175
|
|
- run.setText(map.get(title));
|
176
|
|
- document.setParagraph(contentParagraph, pos);
|
177
|
|
- count++;
|
|
173
|
+ int count = 0;
|
|
174
|
+ int position = 0;
|
|
175
|
+ List<Integer> positions = new ArrayList<>();
|
|
176
|
+ List<String> contents = new ArrayList<>();
|
|
177
|
+ for (int i = 0; i < document.getBodyElements().size(); i++) {
|
|
178
|
+ IBodyElement element = document.getBodyElements().get(i);
|
|
179
|
+ if (element instanceof XWPFParagraph) {
|
|
180
|
+ XWPFParagraph paragraph = (XWPFParagraph) element;
|
|
181
|
+ for (String title : titles) {
|
|
182
|
+ if (paragraph.getText().equals(title)) {
|
|
183
|
+ int pos = position + 1;
|
|
184
|
+ positions.add(pos);
|
|
185
|
+ contents.add(map.get(title));
|
|
186
|
+ count++;
|
|
187
|
+ }
|
|
188
|
+ }
|
|
189
|
+ position++;
|
|
190
|
+ if (count == titles.length)
|
|
191
|
+ break;
|
178
|
192
|
}
|
|
193
|
+ else if (element instanceof XWPFTable) {
|
|
194
|
+ XWPFTable table = (XWPFTable) element;
|
|
195
|
+ position += table.getNumberOfRows();
|
|
196
|
+ }
|
|
197
|
+ }
|
|
198
|
+ for (int i = positions.size() - 1; i >= 0; i--) {
|
|
199
|
+ XWPFParagraph contentParagraph = document.createParagraph();
|
|
200
|
+ contentParagraph.setStyle("1");
|
|
201
|
+ document.setParagraph(contentParagraph, positions.get(i));
|
|
202
|
+ XWPFRun run = contentParagraph.createRun();
|
|
203
|
+ run.setText(contents.get(i));
|
|
204
|
+ }
|
|
205
|
+ try (FileOutputStream out = new FileOutputStream(absolutePath)) {
|
|
206
|
+ document.write(out);
|
179
|
207
|
}
|
180
|
|
- if (count == titles.length)
|
181
|
|
- break;
|
182
|
208
|
}
|
183
|
|
- FileOutputStream out = new FileOutputStream(absolutePath);
|
184
|
|
- document.write(out);
|
185
|
|
- // 关闭文档
|
186
|
|
- out.close();
|
187
|
|
- document.close();
|
188
|
209
|
}
|
189
|
210
|
|
190
|
211
|
/**
|
|
@@ -195,25 +216,26 @@ public class McpServiceImpl implements IMcpService {
|
195
|
216
|
boolean inTargetSection = false;
|
196
|
217
|
filename = Solon.cfg().getProperty("cmc.profile") + filename;
|
197
|
218
|
InputStream fileInputStream = new FileInputStream(filename);
|
198
|
|
- XWPFDocument document = new XWPFDocument(fileInputStream);
|
199
|
|
- for (XWPFParagraph paragraph : document.getParagraphs()) {
|
200
|
|
- String text = paragraph.getText().trim();
|
201
|
|
- if (paragraph.getStyle() != null) {
|
202
|
|
- // 判断主标题
|
203
|
|
- if (paragraph.getStyle().equals("3") &&
|
204
|
|
- text.contains(question)) {
|
205
|
|
- inTargetSection = true;
|
206
|
|
- continue;
|
207
|
|
- }
|
208
|
|
-
|
209
|
|
- // 如果已经在目标节中,收集标题3级别的子标题
|
210
|
|
- if (inTargetSection) {
|
211
|
|
- if (paragraph.getStyle().equals("4")) {
|
212
|
|
- subTitles.add(text);
|
|
219
|
+ try (XWPFDocument document = new XWPFDocument(fileInputStream)) {
|
|
220
|
+ for (XWPFParagraph paragraph : document.getParagraphs()) {
|
|
221
|
+ String text = paragraph.getText().trim();
|
|
222
|
+ if (paragraph.getStyle() != null) {
|
|
223
|
+ // 判断主标题
|
|
224
|
+ if (paragraph.getStyle().equals("3") &&
|
|
225
|
+ text.contains(question)) {
|
|
226
|
+ inTargetSection = true;
|
|
227
|
+ continue;
|
213
|
228
|
}
|
214
|
|
- // 遇到下一个Heading1则退出
|
215
|
|
- else if (paragraph.getStyle().equals("3")) {
|
216
|
|
- break;
|
|
229
|
+
|
|
230
|
+ // 如果已经在目标节中,收集标题3级别的子标题
|
|
231
|
+ if (inTargetSection) {
|
|
232
|
+ if (paragraph.getStyle().equals("4")) {
|
|
233
|
+ subTitles.add(text);
|
|
234
|
+ }
|
|
235
|
+ // 遇到下一个Heading1则退出
|
|
236
|
+ else if (paragraph.getStyle().equals("3")) {
|
|
237
|
+ break;
|
|
238
|
+ }
|
217
|
239
|
}
|
218
|
240
|
}
|
219
|
241
|
}
|
|
@@ -271,15 +293,14 @@ public class McpServiceImpl implements IMcpService {
|
271
|
293
|
Document document;
|
272
|
294
|
InputStream fileInputStream = new FileInputStream(transferFile);
|
273
|
295
|
String filename = transferFile.getName().toLowerCase();
|
274
|
|
- if (filename.endsWith(".doc")) {
|
275
|
|
- HWPFDocument doc = new HWPFDocument(fileInputStream);
|
276
|
|
- document = Document.from(doc.getDocumentText());
|
277
|
|
- }
|
278
|
|
- else if (filename.endsWith(".docx")) {
|
279
|
|
- XWPFDocument docx = new XWPFDocument(fileInputStream);
|
280
|
|
- XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
|
281
|
|
- String text = extractor.getText();
|
282
|
|
- document = Document.from(text);
|
|
296
|
+ if (filename.endsWith(".doc") || filename.endsWith(".docx")) {
|
|
297
|
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(fileInputStream)) {
|
|
298
|
+ String text = extractor.getText();
|
|
299
|
+ document = Document.from(text);
|
|
300
|
+ }
|
|
301
|
+ catch (IOException e) {
|
|
302
|
+ throw new RuntimeException(e);
|
|
303
|
+ }
|
283
|
304
|
}
|
284
|
305
|
else if (filename.endsWith(".pdf")) {
|
285
|
306
|
document = new ApachePdfBoxDocumentParser().parse(fileInputStream);
|