|
@@ -13,6 +13,7 @@ import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
|
13
|
13
|
import dev.langchain4j.data.embedding.Embedding;
|
14
|
14
|
import dev.langchain4j.data.segment.TextSegment;
|
15
|
15
|
import dev.langchain4j.model.embedding.EmbeddingModel;
|
|
16
|
+import dev.langchain4j.store.embedding.EmbeddingMatch;
|
16
|
17
|
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
17
|
18
|
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
18
|
19
|
import io.milvus.client.MilvusClient;
|
|
@@ -28,6 +29,7 @@ import io.milvus.param.dml.SearchParam;
|
28
|
29
|
import io.milvus.response.SearchResultsWrapper;
|
29
|
30
|
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
30
|
31
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
|
32
|
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
31
|
33
|
import org.noear.solon.ai.chat.ChatModel;
|
32
|
34
|
import org.noear.solon.ai.chat.ChatResponse;
|
33
|
35
|
import org.noear.solon.ai.chat.ChatSession;
|
|
@@ -195,7 +197,7 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
195
|
197
|
List<CmcDocument> documentList = cmcDocumentService.selectCmcDocumentList(cmcDocument);
|
196
|
198
|
StringBuilder sb = new StringBuilder("问题: " + question + "\n\n").append("根据以下上下文回答问题:\n\n");
|
197
|
199
|
for (CmcDocument document : documentList) {
|
198
|
|
- File profilePath = new File(RuoYiConfig.getProfile() + "/upload/rag/document/" + document.getPath());
|
|
200
|
+ File profilePath = new File(RuoYiConfig.getProfile() + document.getPath());
|
199
|
201
|
List<TextSegment> segments = splitDocument(document.getPath(), profilePath);
|
200
|
202
|
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
|
201
|
203
|
InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
|
|
@@ -205,11 +207,13 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
205
|
207
|
.queryEmbedding(queryEmbedding)
|
206
|
208
|
.maxResults(1)
|
207
|
209
|
.build();
|
208
|
|
- String contexts = embeddingStore.search(embeddingSearchRequest).matches().get(0).embedded().text();
|
209
|
|
- sb.append("文件").append(": ")
|
210
|
|
- .append(document.getPath()).append("\n\n")
|
211
|
|
- .append("上下文").append(": ")
|
212
|
|
- .append(contexts).append("\n\n");
|
|
210
|
+ for (EmbeddingMatch embeddingMatch : embeddingStore.search(embeddingSearchRequest).matches()) {
|
|
211
|
+ String contexts = embeddingMatch.embedded().toString();
|
|
212
|
+ sb.append("文件").append(": ")
|
|
213
|
+ .append(document.getPath()).append("\n\n")
|
|
214
|
+ .append("上下文").append(": ")
|
|
215
|
+ .append(contexts).append("\n\n");
|
|
216
|
+ }
|
213
|
217
|
}
|
214
|
218
|
return generateAnswer(topicId, sb.toString(), llmServiceUrl);
|
215
|
219
|
}
|
|
@@ -218,18 +222,13 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
218
|
222
|
* 调用LLM生成回答
|
219
|
223
|
*/
|
220
|
224
|
@Override
|
221
|
|
- public Flux<AssistantMessage> generateAnswerWithDocumentAndCollection(EmbeddingModel embeddingModel, String topicId, String chatId, String question, List<JSONObject> requests, String llmServiceUrl) throws IOException {
|
|
225
|
+ public Flux<AssistantMessage> generateAnswerWithDocumentAndCollection(EmbeddingModel embeddingModel, String topicId, String chatId, String question, List<JSONObject> contexts, String llmServiceUrl) throws IOException {
|
|
226
|
+ StringBuilder sb = new StringBuilder("招标文件内容:\n\n");
|
222
|
227
|
CmcDocument cmcDocument = new CmcDocument();
|
223
|
228
|
cmcDocument.setChatId(chatId);
|
224
|
229
|
List<CmcDocument> documentList = cmcDocumentService.selectCmcDocumentList(cmcDocument);
|
225
|
|
- StringBuilder sb = new StringBuilder("问题: " + question + "\n\n").append("根据以下要求:\n\n");
|
226
|
|
- for (JSONObject request : requests) {
|
227
|
|
- sb.append("要求").append(": ")
|
228
|
|
- .append(request.getString("content")).append("\n\n");
|
229
|
|
- }
|
230
|
|
- sb.append("参考以下文件上下文回答问题:\n\n");
|
231
|
230
|
for (CmcDocument document : documentList) {
|
232
|
|
- File profilePath = new File(RuoYiConfig.getProfile() + "/upload/rag/document/" + document.getPath());
|
|
231
|
+ File profilePath = new File(RuoYiConfig.getProfile() + document.getPath());
|
233
|
232
|
List<TextSegment> segments = splitDocument(document.getPath(), profilePath);
|
234
|
233
|
List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
|
235
|
234
|
InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
|
|
@@ -237,17 +236,60 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
237
|
236
|
Embedding queryEmbedding = embeddingModel.embed(question).content();
|
238
|
237
|
EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
|
239
|
238
|
.queryEmbedding(queryEmbedding)
|
240
|
|
- .maxResults(1)
|
|
239
|
+ .maxResults(3)
|
241
|
240
|
.build();
|
242
|
|
- String contexts = embeddingStore.search(embeddingSearchRequest).matches().get(0).embedded().text();
|
243
|
|
- sb.append("文件").append(": ")
|
244
|
|
- .append(document.getPath()).append("\n\n")
|
245
|
|
- .append("上下文").append(": ")
|
246
|
|
- .append(contexts).append("\n\n");
|
|
241
|
+ for (EmbeddingMatch embeddingMatch : embeddingStore.search(embeddingSearchRequest).matches()) {
|
|
242
|
+ String requests = embeddingMatch.embedded().toString();
|
|
243
|
+ sb.append(requests).append("\n\n");
|
|
244
|
+ }
|
247
|
245
|
}
|
|
246
|
+ sb.append("针对本项目招标文件内容,补全以下章节部分:\n\n").append(question);
|
|
247
|
+// for (JSONObject context : contexts) {
|
|
248
|
+// sb.append("文件").append(": ")
|
|
249
|
+// .append(context.getString("file_name")).append("\n\n")
|
|
250
|
+// .append("段落格式").append(": ")
|
|
251
|
+// .append(context.getString("content")).append("\n\n");
|
|
252
|
+// }
|
248
|
253
|
return generateAnswer(topicId, sb.toString(), llmServiceUrl);
|
249
|
254
|
}
|
250
|
255
|
|
|
256
|
+ /**
|
|
257
|
+ * 获取二级标题下三级标题列表
|
|
258
|
+ */
|
|
259
|
+ @Override
|
|
260
|
+ public List<String> extractSubTitles(String filename, String question) throws IOException {
|
|
261
|
+ List<String> subTitles = new ArrayList<>();
|
|
262
|
+ boolean inTargetSection = false;
|
|
263
|
+
|
|
264
|
+ InputStream fileInputStream = new FileInputStream(filename);
|
|
265
|
+ XWPFDocument document = new XWPFDocument(fileInputStream);
|
|
266
|
+ for (XWPFParagraph paragraph : document.getParagraphs()) {
|
|
267
|
+ String text = paragraph.getText().trim();
|
|
268
|
+ if (paragraph.getStyle() != null) {
|
|
269
|
+ // 判断主标题
|
|
270
|
+ if (paragraph.getStyle().equals("3") &&
|
|
271
|
+ text.contains(question)) {
|
|
272
|
+ inTargetSection = true;
|
|
273
|
+ continue;
|
|
274
|
+ }
|
|
275
|
+
|
|
276
|
+ // 如果已经在目标节中,收集标题3级别的子标题
|
|
277
|
+ if (inTargetSection) {
|
|
278
|
+ if (paragraph.getStyle().equals("4")) {
|
|
279
|
+ subTitles.add(text);
|
|
280
|
+ }
|
|
281
|
+ // 遇到下一个Heading1则退出
|
|
282
|
+ else if (paragraph.getStyle().equals("3")) {
|
|
283
|
+ break;
|
|
284
|
+ }
|
|
285
|
+ }
|
|
286
|
+ }
|
|
287
|
+ }
|
|
288
|
+ if (subTitles.size() == 0)
|
|
289
|
+ subTitles.add(question);
|
|
290
|
+ return subTitles;
|
|
291
|
+ }
|
|
292
|
+
|
251
|
293
|
/**
|
252
|
294
|
* 检索知识库
|
253
|
295
|
*/
|
|
@@ -288,14 +330,13 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
288
|
330
|
return wrapper;
|
289
|
331
|
}
|
290
|
332
|
|
291
|
|
-
|
292
|
333
|
/**
|
293
|
334
|
* 检索知识库
|
294
|
335
|
*/
|
295
|
|
- private List<TextSegment> splitDocument(String filename, File profilePath) throws IOException {
|
|
336
|
+ private List<TextSegment> splitDocument(String filename, File transferFile) throws IOException {
|
296
|
337
|
// 加载文档
|
297
|
338
|
Document document;
|
298
|
|
- InputStream fileInputStream = new FileInputStream(profilePath);
|
|
339
|
+ InputStream fileInputStream = new FileInputStream(transferFile);
|
299
|
340
|
filename = filename.toLowerCase();
|
300
|
341
|
if (filename.endsWith(".docx")) {
|
301
|
342
|
XWPFDocument docx = new XWPFDocument(fileInputStream);
|
|
@@ -312,4 +353,5 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
312
|
353
|
DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1000,200);
|
313
|
354
|
return splitter.split(document);
|
314
|
355
|
}
|
|
356
|
+
|
315
|
357
|
}
|