Explorar el Código

写入章节大纲

lamphua hace 1 mes
padre
commit
d32a53463f

+ 1
- 1
llm-back/pom.xml Ver fichero

@@ -27,7 +27,7 @@
27 27
         <fastjson.version>2.0.53</fastjson.version>
28 28
         <oshi.version>6.6.5</oshi.version>
29 29
         <commons.io.version>2.13.0</commons.io.version>
30
-        <poi.version>4.1.2</poi.version>
30
+        <poi.version>5.2.5</poi.version>
31 31
         <velocity.version>2.3</velocity.version>
32 32
         <jwt.version>0.9.1</jwt.version>
33 33
         <!-- override dependency version -->

+ 40
- 37
llm-back/ruoyi-agent/src/main/java/com/ruoyi/agent/service/impl/McpServiceImpl.java Ver fichero

@@ -26,8 +26,8 @@ import io.milvus.param.dml.SearchParam;
26 26
 import io.milvus.response.SearchResultsWrapper;
27 27
 import org.apache.poi.extractor.POITextExtractor;
28 28
 import org.apache.poi.extractor.ExtractorFactory;
29
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
30 29
 import org.apache.poi.xwpf.usermodel.*;
30
+import org.apache.xmlbeans.XmlCursor;
31 31
 import org.noear.solon.Solon;
32 32
 import org.noear.solon.ai.annotation.ToolMapping;
33 33
 import org.noear.solon.ai.chat.ChatModel;
@@ -66,8 +66,9 @@ public class McpServiceImpl implements IMcpService {
66 66
     {
67 67
             try {
68 68
                 title = String.join(",", extractSubTitles( "/upload/agent/template/technical.docx", title));
69
-                List<JSONObject> contexts = retrieveFromMilvus(milvusClient, embeddingModel, collectionName, title, 10);
70
-                return generateAnswerWithDocumentAndCollection(embeddingModel, agentName, templatePath, title, contexts, "http://192.168.28.188:8000/v1/chat/completions");
69
+//                List<JSONObject> contexts = retrieveFromMilvus(milvusClient, embeddingModel, collectionName, title, 10);
70
+//                return generateAnswerWithDocumentAndCollection(embeddingModel, agentName, templatePath, title, contexts, "http://192.168.28.188:8000/v1/chat/completions");
71
+                return generateAnswerWithDocumentAndCollection(embeddingModel, agentName, templatePath, title, new ArrayList<>(), "http://192.168.28.188:8000/v1/chat/completions");
71 72
             } catch (IOException e) {
72 73
                 throw new RuntimeException(e);
73 74
             }
@@ -107,9 +108,11 @@ public class McpServiceImpl implements IMcpService {
107 108
         Embedding queryEmbedding = embeddingModel.embed(question).content();
108 109
         EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
109 110
                 .queryEmbedding(queryEmbedding)
110
-                .maxResults(3)
111
+                .minScore(0.7)
111 112
                 .build();
112
-        for (EmbeddingMatch embeddingMatch : embeddingStore.search(embeddingSearchRequest).matches()) {
113
+        List<EmbeddingMatch<TextSegment>> results = embeddingStore.search(embeddingSearchRequest).matches();
114
+        results.sort(Comparator.comparingDouble(EmbeddingMatch<TextSegment>::score).reversed());
115
+        for (EmbeddingMatch<TextSegment> embeddingMatch : results) {
113 116
             String requests = embeddingMatch.embedded().toString();
114 117
             sb.append(requests).append("\n\n");
115 118
 
@@ -156,52 +159,52 @@ public class McpServiceImpl implements IMcpService {
156 159
             String[] contentLines = content.split("\n");
157 160
             Map<String, String> map = new HashMap<>();
158 161
             String[] titles = question.split(",");
162
+
159 163
             for (int i = 0; i < titles.length; i++) {
160 164
                 int startIndex = Arrays.asList(contentLines).indexOf(titles[i]);
161
-                StringBuilder text = new StringBuilder("");
162
-                if (i < titles.length - 1) {
163
-                    int endIndex = Arrays.asList(contentLines).indexOf(titles[i + 1]);
164
-                    for (int c = startIndex + 1; c < endIndex; c++)
165
-                        text.append(contentLines[c]);
166
-                } else {
167
-                    if (startIndex + 1 < contentLines.length)
168
-                        for (int c = startIndex + 1; c < contentLines.length; c++)
169
-                            text.append(contentLines[c]);
165
+                StringBuilder text = new StringBuilder();
166
+                if (startIndex >= 0) {
167
+                    if (i < titles.length - 1) {
168
+                        int endIndex = Arrays.asList(contentLines).indexOf(titles[i + 1]);
169
+                        for (int c = startIndex + 1; c < endIndex; c++) {
170
+                            text.append(contentLines[c]).append("\n\n");
171
+                        }
172
+                    } else {
173
+                        if (startIndex + 1 < contentLines.length) {
174
+                            for (int c = startIndex + 1; c < contentLines.length; c++) {
175
+                                text.append(contentLines[c]).append("\n\n");
176
+                            }
177
+                        }
178
+                    }
170 179
                 }
180
+                else
181
+                    text.append(content);
171 182
                 map.put(titles[i], text.toString());
172 183
             }
173
-            int count = 0;
174
-            int position = 0;
184
+
175 185
             List<Integer> positions = new ArrayList<>();
176 186
             List<String> contents = new ArrayList<>();
177
-            for (int i = 0; i < document.getBodyElements().size(); i++) {
178
-                IBodyElement element = document.getBodyElements().get(i);
179
-                if (element instanceof XWPFParagraph) {
180
-                    XWPFParagraph paragraph = (XWPFParagraph) element;
181
-                    for (String title : titles) {
182
-                        if (paragraph.getText().equals(title)) {
183
-                            int pos = position + 1;
184
-                            positions.add(pos);
185
-                            contents.add(map.get(title));
186
-                            count++;
187
-                        }
187
+            List<XWPFParagraph> paragraphs = document.getParagraphs();
188
+
189
+            for (int i = 0; i < paragraphs.size(); i++) {
190
+                XWPFParagraph paragraph = paragraphs.get(i);
191
+                for (String title : titles) {
192
+                    if (paragraph.getText().contains(title)) {
193
+                        positions.add(i);
194
+                        contents.add(map.get(title));
188 195
                     }
189
-                    position++;
190
-                    if (count == titles.length)
191
-                        break;
192
-                }
193
-                else if (element instanceof XWPFTable) {
194
-                    XWPFTable table = (XWPFTable) element;
195
-                    position += table.getNumberOfRows();
196 196
                 }
197 197
             }
198
+
198 199
             for (int i = positions.size() - 1; i >= 0; i--) {
199
-                XWPFParagraph contentParagraph = document.createParagraph();
200
+                int insertPos = positions.get(i) + 1;
201
+                XmlCursor xmlCursor = paragraphs.get(insertPos).getCTP().newCursor();
202
+                XWPFParagraph contentParagraph = document.insertNewParagraph(xmlCursor);
200 203
                 contentParagraph.setStyle("1");
201
-                document.setParagraph(contentParagraph, positions.get(i));
202 204
                 XWPFRun run = contentParagraph.createRun();
203 205
                 run.setText(contents.get(i));
204 206
             }
207
+
205 208
             try (FileOutputStream out = new FileOutputStream(absolutePath)) {
206 209
                 document.write(out);
207 210
             }
@@ -311,7 +314,7 @@ public class McpServiceImpl implements IMcpService {
311 314
         else {
312 315
             throw new UnsupportedOperationException("不支持文件类型: " + filename);
313 316
         }
314
-        DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1000,200);
317
+        DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(300,50);
315 318
         return splitter.split(document);
316 319
     }
317 320
 

+ 5
- 3
llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/service/impl/LangChainMilvusServiceImpl.java Ver fichero

@@ -207,9 +207,11 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
207 207
             Embedding queryEmbedding = embeddingModel.embed(question).content();
208 208
             EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
209 209
                     .queryEmbedding(queryEmbedding)
210
-                    .maxResults(1)
210
+                    .minScore(0.7)
211 211
                     .build();
212
-            for (EmbeddingMatch embeddingMatch : embeddingStore.search(embeddingSearchRequest).matches()) {
212
+            List<EmbeddingMatch<TextSegment>> results = embeddingStore.search(embeddingSearchRequest).matches();
213
+            results.sort(Comparator.comparingDouble(EmbeddingMatch<TextSegment>::score).reversed());
214
+            for (EmbeddingMatch<TextSegment> embeddingMatch : results) {
213 215
                 String contexts = embeddingMatch.embedded().toString();
214 216
                 sb.append("文件").append(": ")
215 217
                         .append(document.getPath()).append("\n\n")
@@ -366,7 +368,7 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
366 368
         else {
367 369
             throw new UnsupportedOperationException("不支持文件类型: " + filename);
368 370
         }
369
-        DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1000,200);
371
+        DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(300,50);
370 372
         return splitter.split(document);
371 373
     }
372 374
 

+ 38
- 0
llm-back/ruoyi-system/pom.xml Ver fichero

@@ -23,6 +23,44 @@
23 23
             <artifactId>ruoyi-common</artifactId>
24 24
         </dependency>
25 25
 
26
+        <dependency>
27
+            <groupId>dev.langchain4j</groupId>
28
+            <artifactId>langchain4j-core</artifactId>
29
+            <version>0.35.0</version>
30
+        </dependency>
31
+
32
+        <dependency>
33
+            <groupId>dev.langchain4j</groupId>
34
+            <artifactId>langchain4j</artifactId>
35
+            <version>0.35.0</version>
36
+        </dependency>
37
+
38
+        <dependency>
39
+            <groupId>dev.langchain4j</groupId>
40
+            <artifactId>langchain4j-document-parser-apache-pdfbox</artifactId>
41
+            <version>0.35.0</version>
42
+            <scope>compile</scope>
43
+        </dependency>
44
+
45
+        <dependency>
46
+            <groupId>dev.langchain4j</groupId>
47
+            <artifactId>langchain4j-embeddings-bge-small-zh-v15</artifactId>
48
+            <version>0.35.0</version>
49
+            <scope>compile</scope>
50
+        </dependency>
51
+
52
+        <dependency>
53
+            <groupId>org.noear</groupId>
54
+            <artifactId>solon-ai-mcp</artifactId>
55
+            <version>3.3.1</version>
56
+            <exclusions>
57
+                <exclusion>
58
+                    <groupId>org.slf4j</groupId>
59
+                    <artifactId>*</artifactId>
60
+                </exclusion>
61
+            </exclusions>
62
+        </dependency>
63
+
26 64
     </dependencies>
27 65
 
28 66
 </project>

+ 151
- 23
llm-back/ruoyi-system/src/main/java/com/ruoyi/llm/service/impl/CmcAgentServiceImpl.java Ver fichero

@@ -1,8 +1,7 @@
1 1
 package com.ruoyi.llm.service.impl;
2 2
 
3 3
 import java.io.*;
4
-import java.util.Date;
5
-import java.util.List;
4
+import java.util.*;
6 5
 
7 6
 import com.alibaba.fastjson2.JSONObject;
8 7
 import com.ruoyi.common.config.RuoYiConfig;
@@ -13,7 +12,28 @@ import com.ruoyi.llm.domain.CmcChat;
13 12
 import com.ruoyi.llm.domain.CmcDocument;
14 13
 import com.ruoyi.llm.mapper.CmcChatMapper;
15 14
 import com.ruoyi.llm.mapper.CmcDocumentMapper;
15
+import dev.langchain4j.data.document.Document;
16
+import dev.langchain4j.data.document.parser.TextDocumentParser;
17
+import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
18
+import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
19
+import dev.langchain4j.data.embedding.Embedding;
20
+import dev.langchain4j.data.segment.TextSegment;
21
+import dev.langchain4j.model.embedding.EmbeddingModel;
22
+import dev.langchain4j.model.embedding.onnx.bgesmallzhv15.BgeSmallZhV15EmbeddingModel;
23
+import dev.langchain4j.store.embedding.EmbeddingMatch;
24
+import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
25
+import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
26
+import org.apache.poi.extractor.ExtractorFactory;
27
+import org.apache.poi.extractor.POITextExtractor;
28
+import org.apache.poi.xwpf.usermodel.BreakType;
16 29
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
30
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
31
+import org.apache.poi.xwpf.usermodel.XWPFRun;
32
+import org.noear.solon.ai.chat.ChatModel;
33
+import org.noear.solon.ai.chat.ChatResponse;
34
+import org.noear.solon.ai.chat.ChatSession;
35
+import org.noear.solon.ai.chat.ChatSessionDefault;
36
+import org.noear.solon.ai.chat.message.ChatMessage;
17 37
 import org.springframework.beans.factory.annotation.Autowired;
18 38
 import org.springframework.stereotype.Service;
19 39
 import com.ruoyi.llm.mapper.CmcAgentMapper;
@@ -39,6 +59,10 @@ public class CmcAgentServiceImpl implements ICmcAgentService
39 59
     @Autowired
40 60
     private CmcChatMapper cmcChatMapper;
41 61
 
62
+    private static final EmbeddingModel embeddingModel = new BgeSmallZhV15EmbeddingModel();
63
+
64
+    private static final String llmServiceUrl = "http://192.168.28.188:8000/v1/chat/completions";
65
+
42 66
     /**
43 67
      * 查询智能体
44 68
      * 
@@ -85,33 +109,34 @@ public class CmcAgentServiceImpl implements ICmcAgentService
85 109
      */
86 110
     @Override
87 111
     public JSONObject uploadDocument(MultipartFile file, String agentName) throws IOException {
88
-        File profilePath = new File( RuoYiConfig.getProfile() + "/upload/agent/" + agentName);
112
+        String prefixPath = "/upload/agent/" + agentName;
113
+        File profilePath = new File( RuoYiConfig.getProfile() + prefixPath);
89 114
         if (!profilePath.exists())
90 115
             profilePath.mkdirs();
91
-        File transferFile = new File(profilePath + "/" + file.getOriginalFilename());
92
-        if (!transferFile.exists())
93
-            file.transferTo(transferFile);
94 116
         String chatId = new SnowFlake().generateId();
95 117
         JSONObject jsonObject = new JSONObject();
96 118
         jsonObject.put("chatId", chatId);
97
-        String[] filenameSplit = file.getOriginalFilename().split("\\.");
98
-        String outputFilename = "/upload/agent/" + agentName + "/" + file.getOriginalFilename()
99
-                .replace(filenameSplit[filenameSplit.length - 2], filenameSplit[filenameSplit.length - 2] + "_" + agentName);
100
-        if (file.getOriginalFilename().endsWith(".doc"))
101
-            outputFilename = outputFilename.replace(".doc", ".docx");
119
+        File transferFile = new File(profilePath + "/" + file.getOriginalFilename());
120
+        if (!transferFile.exists())
121
+            file.transferTo(transferFile);
102 122
         CmcDocument cmcDocument = new CmcDocument();
103 123
         cmcDocument.setDocumentId(new SnowFlake().generateId());
104 124
         cmcDocument.setChatId(chatId);
105
-        cmcDocument.setPath(outputFilename);
125
+        cmcDocument.setPath(prefixPath + "/" + file.getOriginalFilename());
106 126
         cmcDocumentMapper.insertCmcDocument(cmcDocument);
107 127
         String message = "";
108 128
         if (agentName.contains("技术")) {
109 129
             CmcChat cmcChat = new CmcChat();
110 130
             cmcChat.setChatId(jsonObject.getString("chatId"));
111 131
             cmcChat.setInputTime(new Date());
112
-            cmcChat.setInput("招标文件地址:" + "/upload/agent/" + agentName + "/" + file.getOriginalFilename());
132
+            cmcChat.setInput("招标文件地址:" + prefixPath + "/" + file.getOriginalFilename());
113 133
             cmcChat.setUserId(SecurityUtils.getUserId());
114 134
             cmcChatMapper.insertCmcChat(cmcChat);
135
+            String[] filenameSplit = file.getOriginalFilename().split("\\.");
136
+            String outputFilename = prefixPath + "/" + file.getOriginalFilename()
137
+                    .replace(filenameSplit[filenameSplit.length - 2], filenameSplit[filenameSplit.length - 2] + "_" + agentName);
138
+            if (file.getOriginalFilename().endsWith(".doc"))
139
+                outputFilename = outputFilename.replace(".doc", ".docx");
115 140
             InputStream fileInputStream = new FileInputStream(RuoYiConfig.getProfile() + "/upload/agent/template/technical.docx");
116 141
             try (XWPFDocument doc = new XWPFDocument(fileInputStream)) {
117 142
                 // 保存文档到本地文件系统
@@ -119,18 +144,12 @@ public class CmcAgentServiceImpl implements ICmcAgentService
119 144
                     doc.write(out);
120 145
                 }
121 146
             }
147
+            String chapters = generateAnswerWithDocument(embeddingModel, profilePath + "/" + file.getOriginalFilename(),
148
+                    RuoYiConfig.getProfile() + outputFilename, "工作大纲", llmServiceUrl);
122 149
             message = "好的,我已经收到您上传的招标文件,我将给您提供技术文件模板,您可点击进行预览:" +
123 150
                     "【<a href='/profile" + outputFilename + "'> 模版 " + "</a>】\n\n" +
124
-                    "技术文件涉及多个章节:\n" +
125
-                    "1 项目条件及特性\n" +
126
-                    "2 项目重点和难点的分析与对策\n" +
127
-                    "3 项目策划、技术路线、工作方法、流程及措施\n" +
128
-                    "4 资源配置\n" +
129
-                    "5 进度计划及保证措施\n" +
130
-                    "6 质量保证措施\n" +
131
-                    "7 服务与技术支持\n" +
132
-                    "8 职业健康、安全生产及环保水保措施\n\n" +
133
-                    "请问您需要哪些章节撰写的帮助?";
151
+                    chapters + "\n\n" +
152
+                    "请问您需要哪个章节撰写的帮助?";
134 153
         }
135 154
         jsonObject.put("assistantMessage", message);
136 155
         return jsonObject;
@@ -203,4 +222,113 @@ public class CmcAgentServiceImpl implements ICmcAgentService
203 222
     {
204 223
         return cmcAgentMapper.deleteCmcAgentByAgentId(agentId);
205 224
     }
225
+
226
+
227
+    /**
228
+     * 调用LLM生成回答
229
+     */
230
+    public String generateAnswerWithDocument(EmbeddingModel embeddingModel, String uploadFilePath, String templatePath, String question, String llmServiceUrl) throws IOException {
231
+        StringBuilder sb = new StringBuilder("招标文件内容:\n\n");
232
+        File profilePath = new File(uploadFilePath);
233
+        List<TextSegment> segments = splitDocument(profilePath);
234
+        List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
235
+        InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
236
+        embeddingStore.addAll(embeddings, segments);
237
+        Embedding queryEmbedding = embeddingModel.embed(question).content();
238
+        EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
239
+                .queryEmbedding(queryEmbedding)
240
+                .minScore(0.7)
241
+                .build();
242
+        List<EmbeddingMatch<TextSegment>> results = embeddingStore.search(embeddingSearchRequest).matches();
243
+        results.sort(Comparator.comparingDouble(EmbeddingMatch<TextSegment>::score).reversed());
244
+        for (EmbeddingMatch<TextSegment> embeddingMatch : results) {
245
+            String requests = embeddingMatch.embedded().toString();
246
+            sb.append(requests).append("\n\n");
247
+        }
248
+        sb.append("请根据上述招标文件内容,严格按以下格式列出").append(question).append(":\n")
249
+                .append("6.1 XX\n" +
250
+                "6.2 XX\n" +
251
+                "6.3 XX");
252
+        return generateAnswer(sb.toString(), question, templatePath, llmServiceUrl);
253
+    }
254
+
255
+    /**
256
+     * 调用LLM生成回答
257
+     * @return
258
+     */
259
+    public String generateAnswer(String prompt, String question, String templatePath, String llmServiceUrl) throws IOException {
260
+        ChatSession chatSession = new ChatSessionDefault();
261
+        ChatModel chatModel = ChatModel.of(llmServiceUrl)
262
+                .provider("openai")
263
+                .model("Qwen2.5-1.5B-Instruct")
264
+                .build();
265
+
266
+        chatSession.addMessage(ChatMessage.ofUser(prompt));
267
+
268
+        ChatResponse response = chatModel.prompt(chatSession).call();
269
+        String content = response.lastChoice().getMessage().getContent();
270
+        writeContent(content, templatePath);
271
+        return content;
272
+    }
273
+
274
+    /**
275
+     * 写入章节大纲
276
+     * @return
277
+     */
278
+    public void writeContent(String content, String templatePath) throws IOException {
279
+        List<String> chapters = new ArrayList<>();
280
+        String[] contentLines = content.split("\n");
281
+        for (String line : contentLines) {
282
+            if (line.contains("6."))
283
+                chapters.add(line.replace("*", "").replace("#", "").split(" ")[1]);
284
+        }
285
+        File file = new File(templatePath);
286
+        FileInputStream fileInputStream = new FileInputStream(file);
287
+        try (XWPFDocument document = new XWPFDocument(fileInputStream)) {
288
+            for (int i = 0; i < chapters.size(); i++) {
289
+                XWPFParagraph contentParagraph = document.createParagraph();
290
+                contentParagraph.setStyle("3");
291
+                XWPFRun run = contentParagraph.createRun();
292
+                run.setText(chapters.get(i));
293
+                run.addBreak();
294
+                if (i < chapters.size() - 1)
295
+                    run.addBreak(BreakType.PAGE);
296
+            }
297
+
298
+            try (FileOutputStream out = new FileOutputStream(templatePath)) {
299
+                document.write(out);
300
+            }
301
+        }
302
+    }
303
+
304
+    /**
305
+     * 检索知识库
306
+     */
307
+    private List<TextSegment> splitDocument(File transferFile) throws IOException {
308
+        // 加载文档
309
+        Document document;
310
+        InputStream fileInputStream = new FileInputStream(transferFile);
311
+        String filename = transferFile.getName().toLowerCase();
312
+        if (filename.endsWith(".doc") || filename.endsWith(".docx")) {
313
+            try (POITextExtractor extractor = ExtractorFactory.createExtractor(fileInputStream)) {
314
+                String text = extractor.getText();
315
+                document = Document.from(text);
316
+            }
317
+            catch (IOException e) {
318
+                throw new RuntimeException(e);
319
+            }
320
+        }
321
+        else if (filename.endsWith(".pdf")) {
322
+            document = new ApachePdfBoxDocumentParser().parse(fileInputStream);
323
+        }
324
+        else if (filename.endsWith(".txt")) {
325
+            document = new TextDocumentParser().parse(fileInputStream);
326
+        }
327
+        else {
328
+            throw new UnsupportedOperationException("不支持文件类型: " + filename);
329
+        }
330
+        DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(300,50);
331
+        return splitter.split(document);
332
+    }
333
+
206 334
 }

Loading…
Cancelar
Guardar