|
@@ -1,26 +1,74 @@
|
1
|
1
|
package com.ruoyi.agent.service.impl;
|
2
|
2
|
|
|
3
|
+import com.alibaba.fastjson2.JSONObject;
|
3
|
4
|
import com.ruoyi.agent.service.IMcpService;
|
|
5
|
+import dev.langchain4j.data.document.Document;
|
|
6
|
+import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
|
|
7
|
+import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
|
|
8
|
+import dev.langchain4j.data.embedding.Embedding;
|
|
9
|
+import dev.langchain4j.data.segment.TextSegment;
|
|
10
|
+import dev.langchain4j.model.embedding.EmbeddingModel;
|
|
11
|
+import dev.langchain4j.model.embedding.onnx.bgesmallzhv15.BgeSmallZhV15EmbeddingModel;
|
|
12
|
+import dev.langchain4j.store.embedding.EmbeddingMatch;
|
|
13
|
+import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
|
14
|
+import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
|
15
|
+import io.milvus.client.MilvusClient;
|
|
16
|
+import io.milvus.client.MilvusServiceClient;
|
|
17
|
+import io.milvus.grpc.SearchResults;
|
|
18
|
+import io.milvus.param.ConnectParam;
|
|
19
|
+import io.milvus.param.MetricType;
|
|
20
|
+import io.milvus.param.R;
|
|
21
|
+import io.milvus.param.RpcStatus;
|
|
22
|
+import io.milvus.param.collection.LoadCollectionParam;
|
|
23
|
+import io.milvus.param.collection.ReleaseCollectionParam;
|
|
24
|
+import io.milvus.param.dml.SearchParam;
|
|
25
|
+import io.milvus.response.SearchResultsWrapper;
|
|
26
|
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
4
|
27
|
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
5
|
28
|
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
|
29
|
+import org.noear.solon.Solon;
|
6
|
30
|
import org.noear.solon.ai.annotation.PromptMapping;
|
7
|
|
-import org.noear.solon.ai.annotation.ResourceMapping;
|
8
|
31
|
import org.noear.solon.ai.annotation.ToolMapping;
|
|
32
|
+import org.noear.solon.ai.chat.ChatModel;
|
|
33
|
+import org.noear.solon.ai.chat.ChatResponse;
|
|
34
|
+import org.noear.solon.ai.chat.ChatSession;
|
|
35
|
+import org.noear.solon.ai.chat.ChatSessionDefault;
|
|
36
|
+import org.noear.solon.ai.chat.message.AssistantMessage;
|
9
|
37
|
import org.noear.solon.ai.chat.message.ChatMessage;
|
10
|
38
|
import org.noear.solon.ai.mcp.server.annotation.McpServerEndpoint;
|
11
|
39
|
import org.noear.solon.annotation.Param;
|
|
40
|
+import org.reactivestreams.Publisher;
|
12
|
41
|
import org.springframework.stereotype.Service;
|
|
42
|
+import reactor.core.publisher.Flux;
|
13
|
43
|
|
14
|
|
-import java.io.FileInputStream;
|
15
|
|
-import java.io.FileOutputStream;
|
16
|
|
-import java.io.IOException;
|
17
|
|
-import java.util.Collection;
|
18
|
|
-import java.util.Collections;
|
|
44
|
+import java.io.*;
|
|
45
|
+import java.util.*;
|
|
46
|
+import java.util.stream.Collectors;
|
19
|
47
|
|
20
|
48
|
@Service
|
21
|
49
|
@McpServerEndpoint(sseEndpoint = "/llm/mcp/sse")
|
22
|
50
|
public class McpServiceImpl implements IMcpService {
|
23
|
51
|
|
|
52
|
+ private static final EmbeddingModel embeddingModel = new BgeSmallZhV15EmbeddingModel();
|
|
53
|
+
|
|
54
|
+ private static final MilvusServiceClient milvusClient = new MilvusServiceClient(
|
|
55
|
+ ConnectParam.newBuilder()
|
|
56
|
+ .withHost("192.168.28.188")
|
|
57
|
+ .withPort(19530)
|
|
58
|
+ .build());
|
|
59
|
+ /**
|
|
60
|
+ * 调用LLM+RAG(外部文件+知识库)生成回答
|
|
61
|
+ */
|
|
62
|
+ @ToolMapping(description = "章节撰写")
|
|
63
|
+ public AssistantMessage writeParagraph(@Param(description = "智能体名称") String collectionName,
|
|
64
|
+ @Param(description = "章节名称") String title,
|
|
65
|
+ @Param(description = "招标文件地址") String templatePath) throws IOException
|
|
66
|
+ {
|
|
67
|
+ title = String.join(",", extractSubTitles( "/upload/agent/template/technical.docx", title));
|
|
68
|
+ List<JSONObject> requests = retrieveFromMilvus(milvusClient, embeddingModel, collectionName, title, 10);
|
|
69
|
+ return generateAnswerWithDocumentAndCollection(embeddingModel, templatePath, title, requests, "http://192.168.28.188:8000/v1/chat/completions");
|
|
70
|
+ }
|
|
71
|
+
|
24
|
72
|
@ToolMapping(description = "梳理招标文件服务要求")
|
25
|
73
|
public String getBidRequest(@Param(description = "文件路径") String document) throws IOException {
|
26
|
74
|
try (XWPFDocument doc = new XWPFDocument(new FileInputStream(document))) {
|
|
@@ -66,4 +114,168 @@ public class McpServiceImpl implements IMcpService {
|
66
|
114
|
"回答以下问题:'" + part + "\n")
|
67
|
115
|
);
|
68
|
116
|
}
|
|
117
|
+
|
|
118
|
+ /**
|
|
119
|
+ * 从Milvus检索相关文档
|
|
120
|
+ * @return
|
|
121
|
+ */
|
|
122
|
+ public List<JSONObject> retrieveFromMilvus(MilvusClient milvusClient, EmbeddingModel embeddingModel, String collectionName, String query, int topK) {
|
|
123
|
+ SearchResultsWrapper wrapper = retrieve(milvusClient, embeddingModel, collectionName, query, topK);
|
|
124
|
+ return wrapper.getRowRecords(0).stream()
|
|
125
|
+ .map(record -> {
|
|
126
|
+ JSONObject result = new JSONObject();
|
|
127
|
+ result.put("file_name", record.get("file_name"));
|
|
128
|
+ result.put("content", record.get("content"));
|
|
129
|
+ return result;
|
|
130
|
+ })
|
|
131
|
+ .collect(Collectors.toList());
|
|
132
|
+ }
|
|
133
|
+
|
|
134
|
+ /**
|
|
135
|
+ * 调用LLM生成回答
|
|
136
|
+ */
|
|
137
|
+ public AssistantMessage generateAnswerWithDocumentAndCollection(EmbeddingModel embeddingModel, String templatePath, String question, List<JSONObject> contexts, String llmServiceUrl) throws IOException {
|
|
138
|
+ StringBuilder sb = new StringBuilder("招标文件内容:\n\n");
|
|
139
|
+ File profilePath = new File(Solon.cfg().getProperty("cmc.profile") + templatePath);
|
|
140
|
+ List<TextSegment> segments = splitDocument(profilePath);
|
|
141
|
+ List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
|
|
142
|
+ InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
|
|
143
|
+ embeddingStore.addAll(embeddings, segments);
|
|
144
|
+ Embedding queryEmbedding = embeddingModel.embed(question).content();
|
|
145
|
+ EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
|
|
146
|
+ .queryEmbedding(queryEmbedding)
|
|
147
|
+ .maxResults(3)
|
|
148
|
+ .build();
|
|
149
|
+ for (EmbeddingMatch embeddingMatch : embeddingStore.search(embeddingSearchRequest).matches()) {
|
|
150
|
+ String requests = embeddingMatch.embedded().toString();
|
|
151
|
+ sb.append(requests).append("\n\n");
|
|
152
|
+
|
|
153
|
+ }
|
|
154
|
+ sb.append("针对本项目招标文件内容,补全以下章节部分:\n\n").append(question);
|
|
155
|
+// for (JSONObject context : contexts) {
|
|
156
|
+// sb.append("文件").append(": ")
|
|
157
|
+// .append(context.getString("file_name")).append("\n\n")
|
|
158
|
+// .append("段落格式").append(": ")
|
|
159
|
+// .append(context.getString("content")).append("\n\n");
|
|
160
|
+// }
|
|
161
|
+ return generateAnswer(sb.toString(), llmServiceUrl);
|
|
162
|
+ }
|
|
163
|
+
|
|
164
|
+ /**
|
|
165
|
+ * 调用LLM生成回答
|
|
166
|
+ * @return
|
|
167
|
+ */
|
|
168
|
+ public AssistantMessage generateAnswer(String prompt, String llmServiceUrl) throws IOException {
|
|
169
|
+ ChatSession chatSession = new ChatSessionDefault();
|
|
170
|
+ ChatModel chatModel = ChatModel.of(llmServiceUrl)
|
|
171
|
+ .provider("openai")
|
|
172
|
+ .model("Qwen2.5-1.5B-Instruct")
|
|
173
|
+ .build();
|
|
174
|
+
|
|
175
|
+ chatSession.addMessage(ChatMessage.ofUser(prompt));
|
|
176
|
+
|
|
177
|
+ ChatResponse response = chatModel.prompt(chatSession).call();
|
|
178
|
+ return response.lastChoice().getMessage();
|
|
179
|
+ }
|
|
180
|
+
|
|
181
|
+ /**
|
|
182
|
+ * 获取二级标题下三级标题列表
|
|
183
|
+ */
|
|
184
|
+ public List<String> extractSubTitles(String filename, String question) throws IOException {
|
|
185
|
+ List<String> subTitles = new ArrayList<>();
|
|
186
|
+ boolean inTargetSection = false;
|
|
187
|
+ filename = Solon.cfg().getProperty("cmc.profile") + filename;
|
|
188
|
+ InputStream fileInputStream = new FileInputStream(filename);
|
|
189
|
+ XWPFDocument document = new XWPFDocument(fileInputStream);
|
|
190
|
+ for (XWPFParagraph paragraph : document.getParagraphs()) {
|
|
191
|
+ String text = paragraph.getText().trim();
|
|
192
|
+ if (paragraph.getStyle() != null) {
|
|
193
|
+ // 判断主标题
|
|
194
|
+ if (paragraph.getStyle().equals("3") &&
|
|
195
|
+ text.contains(question)) {
|
|
196
|
+ inTargetSection = true;
|
|
197
|
+ continue;
|
|
198
|
+ }
|
|
199
|
+
|
|
200
|
+ // 如果已经在目标节中,收集标题3级别的子标题
|
|
201
|
+ if (inTargetSection) {
|
|
202
|
+ if (paragraph.getStyle().equals("4")) {
|
|
203
|
+ subTitles.add(text);
|
|
204
|
+ }
|
|
205
|
+ // 遇到下一个Heading1则退出
|
|
206
|
+ else if (paragraph.getStyle().equals("3")) {
|
|
207
|
+ break;
|
|
208
|
+ }
|
|
209
|
+ }
|
|
210
|
+ }
|
|
211
|
+ }
|
|
212
|
+ if (subTitles.size() == 0)
|
|
213
|
+ subTitles.add(question);
|
|
214
|
+ return subTitles;
|
|
215
|
+ }
|
|
216
|
+
|
|
217
|
+ /**
|
|
218
|
+ * 检索知识库
|
|
219
|
+ */
|
|
220
|
+ private SearchResultsWrapper retrieve(MilvusClient milvusClient, EmbeddingModel embeddingModel, String collectionName, String query, int topK) {
|
|
221
|
+ List<List<Float>> queryVector = Collections.singletonList(embeddingModel.embed(query).content().vectorAsList());
|
|
222
|
+
|
|
223
|
+ // 加载集合
|
|
224
|
+ LoadCollectionParam loadParam = LoadCollectionParam.newBuilder()
|
|
225
|
+ .withCollectionName(collectionName)
|
|
226
|
+ .build();
|
|
227
|
+
|
|
228
|
+ R<RpcStatus> loadResponse = milvusClient.loadCollection(loadParam);
|
|
229
|
+ if (loadResponse.getStatus() != R.Status.Success.getCode()) {
|
|
230
|
+ System.err.println("加载Collection失败: " + loadResponse.getMessage());
|
|
231
|
+ milvusClient.close();
|
|
232
|
+ }
|
|
233
|
+
|
|
234
|
+ // 构建SearchParam
|
|
235
|
+ SearchParam searchParam = SearchParam.newBuilder()
|
|
236
|
+ .withCollectionName(collectionName)
|
|
237
|
+ .withVectors(queryVector)
|
|
238
|
+ .withTopK(topK)
|
|
239
|
+ .withOutFields(Arrays.asList("file_name", "file_type", "content"))
|
|
240
|
+ .withVectorFieldName("embedding")
|
|
241
|
+ .withMetricType(MetricType.COSINE)
|
|
242
|
+ .withParams("{\"nprobe\": 8}")
|
|
243
|
+ .build();
|
|
244
|
+
|
|
245
|
+ R<SearchResults> response = milvusClient.search(searchParam);
|
|
246
|
+ SearchResultsWrapper wrapper = new SearchResultsWrapper(response.getData().getResults());
|
|
247
|
+
|
|
248
|
+ // 释放集合
|
|
249
|
+ ReleaseCollectionParam param = ReleaseCollectionParam.newBuilder()
|
|
250
|
+ .withCollectionName(collectionName)
|
|
251
|
+ .build();
|
|
252
|
+ milvusClient.releaseCollection(param);
|
|
253
|
+
|
|
254
|
+ return wrapper;
|
|
255
|
+ }
|
|
256
|
+
|
|
257
|
+ /**
|
|
258
|
+ * 检索知识库
|
|
259
|
+ */
|
|
260
|
+ private List<TextSegment> splitDocument(File transferFile) throws IOException {
|
|
261
|
+ // 加载文档
|
|
262
|
+ Document document;
|
|
263
|
+ InputStream fileInputStream = new FileInputStream(transferFile);
|
|
264
|
+ String filename = transferFile.getName().toLowerCase();
|
|
265
|
+ if (filename.endsWith(".docx")) {
|
|
266
|
+ XWPFDocument docx = new XWPFDocument(fileInputStream);
|
|
267
|
+ XWPFWordExtractor extractor = new XWPFWordExtractor(docx);
|
|
268
|
+ String text = extractor.getText();
|
|
269
|
+ document = Document.from(text);
|
|
270
|
+ }
|
|
271
|
+ else if (filename.endsWith(".pdf")) {
|
|
272
|
+ document = new ApachePdfBoxDocumentParser().parse(fileInputStream);
|
|
273
|
+ }
|
|
274
|
+ else {
|
|
275
|
+ throw new UnsupportedOperationException("不支持文件类型: " + filename);
|
|
276
|
+ }
|
|
277
|
+ DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1000,200);
|
|
278
|
+ return splitter.split(document);
|
|
279
|
+ }
|
|
280
|
+
|
69
|
281
|
}
|