|
@@ -1,20 +1,18 @@
|
1
|
|
-/*
|
2
|
|
- * @Author: ysh
|
3
|
|
- * @Date: 2025-07-08 15:10:42
|
4
|
|
- * @LastEditors: Please set LastEditors
|
5
|
|
- * @LastEditTime: 2025-07-21 15:42:26
|
6
|
|
- */
|
7
|
1
|
package com.ruoyi.web.llm.service.impl;
|
8
|
2
|
|
9
|
3
|
import com.alibaba.fastjson2.JSONObject;
|
10
|
4
|
import com.ruoyi.common.config.RuoYiConfig;
|
|
5
|
+import com.ruoyi.llm.domain.CmcChat;
|
|
6
|
+import com.ruoyi.llm.service.ICmcChatService;
|
11
|
7
|
import com.ruoyi.web.llm.service.ILangChainMilvusService;
|
12
|
8
|
import dev.langchain4j.data.document.Document;
|
13
|
9
|
import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
|
14
|
10
|
import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
|
|
11
|
+import dev.langchain4j.data.embedding.Embedding;
|
15
|
12
|
import dev.langchain4j.data.segment.TextSegment;
|
16
|
13
|
import dev.langchain4j.model.embedding.EmbeddingModel;
|
17
|
|
-
|
|
14
|
+import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
|
15
|
+import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
18
|
16
|
import io.milvus.client.MilvusClient;
|
19
|
17
|
import io.milvus.grpc.MutationResult;
|
20
|
18
|
import io.milvus.grpc.SearchResults;
|
|
@@ -30,53 +28,40 @@ import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
30
|
28
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
31
|
29
|
import org.noear.solon.ai.chat.ChatModel;
|
32
|
30
|
import org.noear.solon.ai.chat.ChatResponse;
|
|
31
|
+import org.noear.solon.ai.chat.ChatSession;
|
|
32
|
+import org.noear.solon.ai.chat.ChatSessionDefault;
|
33
|
33
|
import org.noear.solon.ai.chat.message.AssistantMessage;
|
|
34
|
+import org.noear.solon.ai.chat.message.ChatMessage;
|
34
|
35
|
import org.reactivestreams.Publisher;
|
|
36
|
+import org.springframework.beans.factory.annotation.Autowired;
|
35
|
37
|
import org.springframework.stereotype.Service;
|
36
|
38
|
import org.springframework.web.multipart.MultipartFile;
|
37
|
39
|
import reactor.core.publisher.Flux;
|
38
|
40
|
|
39
|
|
-import java.io.File;
|
40
|
|
-import java.io.FileInputStream;
|
41
|
|
-import java.io.IOException;
|
42
|
|
-import java.io.InputStream;
|
|
41
|
+import java.io.*;
|
43
|
42
|
import java.util.*;
|
44
|
43
|
import java.util.stream.Collectors;
|
45
|
44
|
|
46
|
45
|
@Service
|
47
|
46
|
public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
48
|
47
|
{
|
|
48
|
+ @Autowired
|
|
49
|
+ private ICmcChatService cmcChatService;
|
|
50
|
+
|
49
|
51
|
/**
|
50
|
52
|
* 导入知识库文件
|
51
|
53
|
*/
|
52
|
54
|
@Override
|
53
|
55
|
public R<MutationResult> insertLangchainEmbeddingDocument(MilvusClient milvusClient, MultipartFile file, String collectionName, EmbeddingModel embeddingModel) throws IOException
|
54
|
56
|
{
|
55
|
|
- File profilePath = new File( RuoYiConfig.getProfile() + "/upload/knowledge/" + collectionName);
|
|
57
|
+ File profilePath = new File( RuoYiConfig.getProfile() + "/upload/rag/knowledge/" + collectionName);
|
56
|
58
|
if (!profilePath.exists())
|
57
|
59
|
profilePath.mkdirs();
|
58
|
60
|
File transferFile = new File( profilePath + File.separator + file.getOriginalFilename());
|
59
|
61
|
if (!transferFile.exists()) {
|
60
|
62
|
file.transferTo(transferFile);
|
61
|
63
|
}
|
62
|
|
- // 加载文档
|
63
|
|
- Document document;
|
64
|
|
- InputStream fileInputStream = new FileInputStream(transferFile);
|
65
|
|
- String filename = file.getOriginalFilename().toLowerCase();
|
66
|
|
- if (filename.endsWith(".docx")) {
|
67
|
|
- XWPFDocument docx = new XWPFDocument(fileInputStream);
|
68
|
|
- XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
|
69
|
|
- String text = extractor.getText();
|
70
|
|
- document = Document.from(text);
|
71
|
|
- }
|
72
|
|
- else if (filename.endsWith(".pdf")) {
|
73
|
|
- document = new ApachePdfBoxDocumentParser().parse(fileInputStream);
|
74
|
|
- }
|
75
|
|
- else {
|
76
|
|
- throw new UnsupportedOperationException("不支持文件类型: " + filename);
|
77
|
|
- }
|
78
|
|
- DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1000,200);;
|
79
|
|
- List<TextSegment> segments = splitter.split(document);
|
|
64
|
+ List<TextSegment> segments = splitDocument(file.getOriginalFilename(), transferFile);
|
80
|
65
|
|
81
|
66
|
// 提取文本和生成嵌入
|
82
|
67
|
List<String> fileNames = new ArrayList<>();
|
|
@@ -144,16 +129,45 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
144
|
129
|
return result;
|
145
|
130
|
})
|
146
|
131
|
.collect(Collectors.toList());
|
147
|
|
- wrapperList.removeIf(jsonObject -> jsonObject.getDouble("distance") < 0.75);
|
|
132
|
+ wrapperList.removeIf(jsonObject -> jsonObject.getDouble("distance") < 0.7);
|
148
|
133
|
return wrapperList;
|
149
|
134
|
}
|
150
|
135
|
|
|
136
|
+ /**
|
|
137
|
+ * 调用LLM生成回答
|
|
138
|
+ * @return
|
|
139
|
+ */
|
|
140
|
+ @Override
|
|
141
|
+ public Flux<AssistantMessage> generateAnswer(String topicId, String prompt, String llmServiceUrl) {
|
|
142
|
+ ChatSession chatSession = new ChatSessionDefault(topicId);
|
|
143
|
+ ChatModel chatModel = ChatModel.of(llmServiceUrl)
|
|
144
|
+ .provider("openai")
|
|
145
|
+ .model("Qwen2.5-1.5B-Instruct")
|
|
146
|
+ .apiKey("1")
|
|
147
|
+ .build();
|
|
148
|
+
|
|
149
|
+ CmcChat cmcChat = new CmcChat();
|
|
150
|
+ cmcChat.setTopicId(topicId);
|
|
151
|
+ List<CmcChat> cmcChatList = cmcChatService.selectCmcChatList(cmcChat);
|
|
152
|
+ for (CmcChat chat : cmcChatList) {
|
|
153
|
+ chatSession.addMessage(ChatMessage.ofUser(chat.getInput()));
|
|
154
|
+ chatSession.addMessage(ChatMessage.ofAssistant(chat.getOutput()));
|
|
155
|
+ }
|
|
156
|
+ chatSession.addMessage(ChatMessage.ofUser(prompt));
|
|
157
|
+
|
|
158
|
+ Publisher<ChatResponse> publisher = chatModel.prompt(chatSession).stream();
|
|
159
|
+ return Flux.from(publisher)
|
|
160
|
+ .map(response -> {
|
|
161
|
+ return response.lastChoice().getMessage();
|
|
162
|
+ });
|
|
163
|
+ }
|
|
164
|
+
|
151
|
165
|
/**
|
152
|
166
|
* 调用LLM+RAG生成回答
|
153
|
167
|
* @return
|
154
|
168
|
*/
|
155
|
169
|
@Override
|
156
|
|
- public Flux<AssistantMessage> generateAnswerWithRag(String question, List<JSONObject> contexts, String llmServiceUrl) {
|
|
170
|
+ public Flux<AssistantMessage> generateAnswerWithCollection(String topicId, String question, List<JSONObject> contexts, String llmServiceUrl) {
|
157
|
171
|
StringBuilder sb = new StringBuilder();
|
158
|
172
|
sb.append("问题: ").append(question).append("\n\n");
|
159
|
173
|
sb.append("根据以下上下文回答问题:\n\n");
|
|
@@ -163,29 +177,38 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
163
|
177
|
.append("上下文").append(": ")
|
164
|
178
|
.append(contexts.get(i).getString("content")).append("\n\n");
|
165
|
179
|
}
|
166
|
|
- // 构建带动态参数的URL
|
167
|
|
- return generateAnswer(sb.toString(), llmServiceUrl);
|
|
180
|
+ return generateAnswer(topicId, sb.toString(), llmServiceUrl);
|
168
|
181
|
}
|
169
|
182
|
|
170
|
183
|
/**
|
171
|
184
|
* 调用LLM生成回答
|
172
|
|
- * @return
|
173
|
185
|
*/
|
174
|
186
|
@Override
|
175
|
|
- public Flux<AssistantMessage> generateAnswer(String prompt, String llmServiceUrl) {
|
176
|
|
- ChatModel chatModel = ChatModel.of(llmServiceUrl)
|
177
|
|
- .provider("openai")
|
178
|
|
- .model("Qwen2.5-1.5B-Instruct")
|
179
|
|
- .apiKey("1")
|
180
|
|
- .build();
|
181
|
|
- Publisher<ChatResponse> publisher = chatModel.prompt(prompt).stream();
|
|
187
|
+ public Flux<AssistantMessage> generateAnswerWithDocument(EmbeddingModel embeddingModel, String filename, String topicId, String question, String llmServiceUrl) throws IOException {
|
182
|
188
|
|
183
|
|
- return Flux.from(publisher)
|
184
|
|
- .map(response -> {
|
185
|
|
- return response.lastChoice().getMessage();
|
186
|
|
- });
|
|
189
|
+ File profilePath = new File( RuoYiConfig.getProfile() + "/upload/rag/document/" + filename);
|
|
190
|
+ List<TextSegment> segments = splitDocument(filename, profilePath);
|
|
191
|
+ List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
|
|
192
|
+ InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
|
|
193
|
+ embeddingStore.addAll(embeddings, segments);
|
|
194
|
+ Embedding queryEmbedding = embeddingModel.embed(question).content();
|
|
195
|
+ EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
|
|
196
|
+ .queryEmbedding(queryEmbedding)
|
|
197
|
+ .maxResults(1)
|
|
198
|
+ .build();
|
|
199
|
+ String contexts = embeddingStore.search(embeddingSearchRequest).matches().get(0).embedded().text();
|
|
200
|
+ String sb = "问题: " + question + "\n\n" +
|
|
201
|
+ "根据以下上下文回答问题:\n\n" +
|
|
202
|
+ "文件" + ": " +
|
|
203
|
+ filename + "\n\n" +
|
|
204
|
+ "上下文" + ": " +
|
|
205
|
+ contexts + "\n\n";
|
|
206
|
+ return generateAnswer(topicId, sb, llmServiceUrl);
|
187
|
207
|
}
|
188
|
208
|
|
|
209
|
+ /**
|
|
210
|
+ * 检索知识库
|
|
211
|
+ */
|
189
|
212
|
private SearchResultsWrapper retrieve(MilvusClient milvusClient, EmbeddingModel embeddingModel, String collectionName, String query, int topK) {
|
190
|
213
|
List<List<Float>> queryVector = Collections.singletonList(embeddingModel.embed(query).content().vectorAsList());
|
191
|
214
|
|
|
@@ -222,4 +245,33 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
|
222
|
245
|
|
223
|
246
|
return wrapper;
|
224
|
247
|
}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+ /**
|
|
251
|
+ * 检索知识库
|
|
252
|
+ */
|
|
253
|
+ private List<TextSegment> splitDocument(String filename, File profilePath) throws IOException {
|
|
254
|
+ // 加载文档
|
|
255
|
+ Document document;
|
|
256
|
+ InputStream fileInputStream = new FileInputStream(profilePath);
|
|
257
|
+ filename = filename.toLowerCase();
|
|
258
|
+ if (filename.endsWith(".docx")) {
|
|
259
|
+ XWPFDocument docx = new XWPFDocument(fileInputStream);
|
|
260
|
+ XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
|
|
261
|
+ String text = extractor.getText();
|
|
262
|
+ document = Document.from(text);
|
|
263
|
+ }
|
|
264
|
+ else if (filename.endsWith(".pdf")) {
|
|
265
|
+ document = new ApachePdfBoxDocumentParser().parse(fileInputStream);
|
|
266
|
+ }
|
|
267
|
+ else {
|
|
268
|
+ throw new UnsupportedOperationException("不支持文件类型: " + filename);
|
|
269
|
+ }
|
|
270
|
+ DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1000,200);
|
|
271
|
+ return splitter.split(document);
|
|
272
|
+ }
|
|
273
|
+ interface Assistant {
|
|
274
|
+
|
|
275
|
+ String chat(String message);
|
|
276
|
+ }
|
225
|
277
|
}
|