10 月之前 · f1fd168fc3
--- a/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/controller/CmcChatController.java
+++ b/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/controller/CmcChatController.java
@@ -74,7 +74,8 @@ public class CmcChatController extends BaseController
 
				
				     @PostMapping
			
 
				
				     public AjaxResult add(@RequestBody CmcChat cmcChat)
			
 
				
				     {
			
 
				
				-        cmcChat.setChatId(new SnowFlake().generateId());
			
 
				
				+        if (cmcChat.getChatId() == null)
			
 
				
				+            cmcChat.setChatId(new SnowFlake().generateId());
			
 
				
				         return toAjax(cmcChatService.insertCmcChat(cmcChat));
			
 
				
				     }
			
 
				
				 
			
--- a/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/controller/CmcDocumentController.java
+++ b/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/controller/CmcDocumentController.java
@@ -1,8 +1,13 @@
 
				
				 package com.ruoyi.web.llm.controller;
			
 
				
				 
			
 
				
				+import java.io.File;
			
 
				
				+import java.io.IOException;
			
 
				
				+import java.util.ArrayList;
			
 
				
				 import java.util.List;
			
 
				
				 import javax.servlet.http.HttpServletResponse;
			
 
				
				 
			
 
				
				+import com.alibaba.fastjson2.JSONObject;
			
 
				
				+import com.ruoyi.common.config.RuoYiConfig;
			
 
				
				 import com.ruoyi.common.utils.SnowFlake;
			
 
				
				 import org.springframework.beans.factory.annotation.Autowired;
			
 
				
				 import org.springframework.web.bind.annotation.GetMapping;
			
@@ -21,6 +26,7 @@ import com.ruoyi.llm.domain.CmcDocument;
 
				
				 import com.ruoyi.llm.service.ICmcDocumentService;
			
 
				
				 import com.ruoyi.common.utils.poi.ExcelUtil;
			
 
				
				 import com.ruoyi.common.core.page.TableDataInfo;
			
 
				
				+import org.springframework.web.multipart.MultipartFile;
			
 
				
				 
			
 
				
				 /**
			
 
				
				  * cmc聊天附件Controller
			
@@ -67,6 +73,35 @@ public class CmcDocumentController extends BaseController
 
				
				         return success(cmcDocumentService.selectCmcDocumentByDocumentId(documentId));
			
 
				
				     }
			
 
				
				 
			
 
				
				+    /**
			
 
				
				+     * 上传外部文件
			
 
				
				+     * @return
			
 
				
				+     */
			
 
				
				+    @GetMapping("/upload")
			
 
				
				+    public JSONObject upload(MultipartFile[] fileList) throws IOException {
			
 
				
				+        File profilePath = new File( RuoYiConfig.getProfile() + "/upload/rag/document" );
			
 
				
				+        if (!profilePath.exists())
			
 
				
				+            profilePath.mkdirs();
			
 
				
				+        String chatId = new SnowFlake().generateId();
			
 
				
				+        JSONObject jsonObject = new JSONObject();
			
 
				
				+        jsonObject.put("chatId", chatId);
			
 
				
				+        List<String> filenames = new ArrayList<>();
			
 
				
				+        for (MultipartFile file : fileList) {
			
 
				
				+            File transferFile = new File(profilePath + File.separator + file.getOriginalFilename());
			
 
				
				+            if (!transferFile.exists()) {
			
 
				
				+                file.transferTo(transferFile);
			
 
				
				+            }
			
 
				
				+            CmcDocument cmcDocument = new CmcDocument();
			
 
				
				+            cmcDocument.setDocumentId(new SnowFlake().generateId());
			
 
				
				+            cmcDocument.setChatId(chatId);
			
 
				
				+            cmcDocument.setPath(file.getOriginalFilename());
			
 
				
				+            cmcDocumentService.insertCmcDocument(cmcDocument);
			
 
				
				+            filenames.add(file.getOriginalFilename());
			
 
				
				+        }
			
 
				
				+        jsonObject.put("filenames", filenames);
			
 
				
				+        return jsonObject;
			
 
				
				+    }
			
 
				
				+
			
 
				
				     /**
			
 
				
				      * 新增cmc聊天附件
			
 
				
				      */
			
@@ -74,7 +109,6 @@ public class CmcDocumentController extends BaseController
 
				
				     @PostMapping
			
 
				
				     public AjaxResult add(@RequestBody CmcDocument cmcDocument)
			
 
				
				     {
			
 
				
				-        cmcDocument.setDocumentId(new SnowFlake().generateId());
			
 
				
				         return toAjax(cmcDocumentService.insertCmcDocument(cmcDocument));
			
 
				
				     }
			
 
				
				 
			
--- a/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/controller/SessionController.java
+++ b/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/controller/SessionController.java
@@ -1,9 +1,6 @@
 
				
				 package com.ruoyi.web.llm.controller;
			
 
				
				 
			
 
				
				-import com.ruoyi.common.config.RuoYiConfig;
			
 
				
				 import com.ruoyi.common.core.controller.BaseController;
			
 
				
				-import com.ruoyi.llm.domain.CmcDocument;
			
 
				
				-import com.ruoyi.llm.service.ICmcDocumentService;
			
 
				
				 import com.ruoyi.web.llm.service.ILangChainMilvusService;
			
 
				
				 import dev.langchain4j.model.embedding.EmbeddingModel;
			
 
				
				 import dev.langchain4j.model.embedding.onnx.bgesmallzhv15.BgeSmallZhV15EmbeddingModel;
			
@@ -12,10 +9,8 @@ import org.springframework.beans.factory.annotation.Autowired;
 
				
				 import org.springframework.web.bind.annotation.GetMapping;
			
 
				
				 import org.springframework.web.bind.annotation.RequestMapping;
			
 
				
				 import org.springframework.web.bind.annotation.RestController;
			
 
				
				-import org.springframework.web.multipart.MultipartFile;
			
 
				
				 import reactor.core.publisher.Flux;
			
 
				
				 
			
 
				
				-import java.io.File;
			
 
				
				 import java.io.IOException;
			
 
				
				 
			
 
				
				 /**
			
@@ -31,9 +26,6 @@ public class SessionController extends BaseController
 
				
				     @Autowired
			
 
				
				     private ILangChainMilvusService langChainMilvusService;
			
 
				
				 
			
 
				
				-    @Autowired
			
 
				
				-    private ICmcDocumentService cmcDocumentService;
			
 
				
				-
			
 
				
				     private static final EmbeddingModel embeddingModel = new BgeSmallZhV15EmbeddingModel();
			
 
				
				 
			
 
				
				     /**
			
@@ -44,33 +36,13 @@ public class SessionController extends BaseController
 
				
				         return langChainMilvusService.generateAnswer(topicId, question, "http://192.168.28.188:8000/v1/chat/completions");
			
 
				
				     }
			
 
				
				 
			
 
				
				-    /**
			
 
				
				-     * 上传外部文件
			
 
				
				-     * @return
			
 
				
				-     */
			
 
				
				-    @GetMapping("/upload")
			
 
				
				-    public String upload(MultipartFile file) throws IOException {
			
 
				
				-        File profilePath = new File( RuoYiConfig.getProfile() + "/upload/rag/document" );
			
 
				
				-        if (!profilePath.exists())
			
 
				
				-            profilePath.mkdirs();
			
 
				
				-        File transferFile = new File( profilePath + File.separator + file.getOriginalFilename());
			
 
				
				-        if (!transferFile.exists()) {
			
 
				
				-            file.transferTo(transferFile);
			
 
				
				-        }
			
 
				
				-        return file.getOriginalFilename();
			
 
				
				-    }
			
 
				
				-
			
 
				
				     /**
			
 
				
				      * 调用LLM+RAG（外部文件）生成回答
			
 
				
				      */
			
 
				
				     @GetMapping("/answerWithDocument")
			
 
				
				-    public Flux<AssistantMessage> answerWithDocument(String filename, String chatId, String topicId, String question) throws IOException
			
 
				
				+    public Flux<AssistantMessage> answerWithDocument(String chatId, String question) throws IOException
			
 
				
				     {
			
 
				
				-        CmcDocument cmcDocument = new CmcDocument();
			
 
				
				-        cmcDocument.setChatId(chatId);
			
 
				
				-        cmcDocument.setPath(filename);
			
 
				
				-        cmcDocumentService.insertCmcDocument(cmcDocument);
			
 
				
				-        return langChainMilvusService.generateAnswerWithDocument(embeddingModel, filename, topicId, question, "http://192.168.28.188:8000/v1/chat/completions");
			
 
				
				+        return langChainMilvusService.generateAnswerWithDocument(embeddingModel, chatId, question, "http://192.168.28.188:8000/v1/chat/completions");
			
 
				
				     }
			
 
				
				 
			
 
				
				 }
			
--- a/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/service/ILangChainMilvusService.java
+++ b/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/service/ILangChainMilvusService.java
@@ -47,6 +47,6 @@ public interface ILangChainMilvusService {
 
				
				      * 调用LLM+RAG（外部文件）生成回答
			
 
				
				      * @return
			
 
				
				      */
			
 
				
				-    public Flux<AssistantMessage> generateAnswerWithDocument(EmbeddingModel embeddingModel, String filename, String topicId, String question, String llmServiceUrl) throws IOException;
			
 
				
				+    public Flux<AssistantMessage> generateAnswerWithDocument(EmbeddingModel embeddingModel, String chatId, String question, String llmServiceUrl) throws IOException;
			
 
				
				 
			
 
				
				 }
			
--- a/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/service/impl/LangChainMilvusServiceImpl.java
+++ b/llm-back/ruoyi-llm/src/main/java/com/ruoyi/web/llm/service/impl/LangChainMilvusServiceImpl.java
@@ -3,7 +3,9 @@ package com.ruoyi.web.llm.service.impl;
 
				
				 import com.alibaba.fastjson2.JSONObject;
			
 
				
				 import com.ruoyi.common.config.RuoYiConfig;
			
 
				
				 import com.ruoyi.llm.domain.CmcChat;
			
 
				
				+import com.ruoyi.llm.domain.CmcDocument;
			
 
				
				 import com.ruoyi.llm.service.ICmcChatService;
			
 
				
				+import com.ruoyi.llm.service.ICmcDocumentService;
			
 
				
				 import com.ruoyi.web.llm.service.ILangChainMilvusService;
			
 
				
				 import dev.langchain4j.data.document.Document;
			
 
				
				 import dev.langchain4j.data.document.parser.apache.pdfbox.ApachePdfBoxDocumentParser;
			
@@ -48,6 +50,9 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
 
				
				     @Autowired
			
 
				
				     private ICmcChatService cmcChatService;
			
 
				
				 
			
 
				
				+    @Autowired
			
 
				
				+    private ICmcDocumentService cmcDocumentService;
			
 
				
				+
			
 
				
				     /**
			
 
				
				      * 导入知识库文件
			
 
				
				      */
			
@@ -146,12 +151,14 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
 
				
				                 .apiKey("1")
			
 
				
				                 .build();
			
 
				
				 
			
 
				
				-        CmcChat cmcChat = new CmcChat();
			
 
				
				-        cmcChat.setTopicId(topicId);
			
 
				
				-        List<CmcChat> cmcChatList = cmcChatService.selectCmcChatList(cmcChat);
			
 
				
				-        for (CmcChat chat : cmcChatList) {
			
 
				
				-            chatSession.addMessage(ChatMessage.ofUser(chat.getInput()));
			
 
				
				-            chatSession.addMessage(ChatMessage.ofAssistant(chat.getOutput()));
			
 
				
				+        if (topicId != null) {
			
 
				
				+            CmcChat cmcChat = new CmcChat();
			
 
				
				+            cmcChat.setTopicId(topicId);
			
 
				
				+            List<CmcChat> cmcChatList = cmcChatService.selectCmcChatList(cmcChat);
			
 
				
				+            for (CmcChat chat : cmcChatList) {
			
 
				
				+                chatSession.addMessage(ChatMessage.ofUser(chat.getInput()));
			
 
				
				+                chatSession.addMessage(ChatMessage.ofAssistant(chat.getOutput()));
			
 
				
				+            }
			
 
				
				         }
			
 
				
				         chatSession.addMessage(ChatMessage.ofUser(prompt));
			
 
				
				 
			
@@ -184,26 +191,30 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
 
				
				      * 调用LLM生成回答
			
 
				
				      */
			
 
				
				     @Override
			
 
				
				-    public Flux<AssistantMessage> generateAnswerWithDocument(EmbeddingModel embeddingModel, String filename, String topicId, String question, String llmServiceUrl) throws IOException {
			
 
				
				-
			
 
				
				-        File profilePath = new File( RuoYiConfig.getProfile() + "/upload/rag/document/" + filename);
			
 
				
				-        List<TextSegment> segments = splitDocument(filename, profilePath);
			
 
				
				-        List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
			
 
				
				-        InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
			
 
				
				-        embeddingStore.addAll(embeddings, segments);
			
 
				
				-        Embedding queryEmbedding = embeddingModel.embed(question).content();
			
 
				
				-        EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
			
 
				
				-                .queryEmbedding(queryEmbedding)
			
 
				
				-                .maxResults(1)
			
 
				
				-                .build();
			
 
				
				-        String contexts = embeddingStore.search(embeddingSearchRequest).matches().get(0).embedded().text();
			
 
				
				-        String sb = "问题: " + question + "\n\n" +
			
 
				
				-                "根据以下上下文回答问题：\n\n" +
			
 
				
				-                "文件" + ": " +
			
 
				
				-                filename + "\n\n" +
			
 
				
				-                "上下文" + ": " +
			
 
				
				-                contexts + "\n\n";
			
 
				
				-        return generateAnswer(topicId, sb, llmServiceUrl);
			
 
				
				+    public Flux<AssistantMessage> generateAnswerWithDocument(EmbeddingModel embeddingModel, String chatId, String question, String llmServiceUrl) throws IOException {
			
 
				
				+        String topicId = cmcChatService.selectCmcChatByChatId(chatId).getTopicId();
			
 
				
				+        CmcDocument cmcDocument = new CmcDocument();
			
 
				
				+        cmcDocument.setChatId(chatId);
			
 
				
				+        List<CmcDocument> documentList = cmcDocumentService.selectCmcDocumentList(cmcDocument);
			
 
				
				+        StringBuilder sb = new StringBuilder("问题: " + question + "\n\n").append("根据以下上下文回答问题：\n\n");
			
 
				
				+        for (CmcDocument document : documentList) {
			
 
				
				+            File profilePath = new File(RuoYiConfig.getProfile() + "/upload/rag/document/" + document.getPath());
			
 
				
				+            List<TextSegment> segments = splitDocument(document.getPath(), profilePath);
			
 
				
				+            List<Embedding> embeddings = embeddingModel.embedAll(segments).content();
			
 
				
				+            InMemoryEmbeddingStore<TextSegment> embeddingStore = new InMemoryEmbeddingStore<>();
			
 
				
				+            embeddingStore.addAll(embeddings, segments);
			
 
				
				+            Embedding queryEmbedding = embeddingModel.embed(question).content();
			
 
				
				+            EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
			
 
				
				+                    .queryEmbedding(queryEmbedding)
			
 
				
				+                    .maxResults(1)
			
 
				
				+                    .build();
			
 
				
				+            String contexts = embeddingStore.search(embeddingSearchRequest).matches().get(0).embedded().text();
			
 
				
				+            sb.append("文件").append(": ")
			
 
				
				+                    .append(document.getPath()).append("\n\n")
			
 
				
				+                    .append("上下文").append(": ")
			
 
				
				+                    .append(contexts).append("\n\n");
			
 
				
				+        }
			
 
				
				+        return generateAnswer(topicId, sb.toString(), llmServiceUrl);
			
 
				
				     }
			
 
				
				 
			
 
				
				     /**
			
@@ -270,8 +281,4 @@ public class LangChainMilvusServiceImpl implements ILangChainMilvusService
 
				
				         DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1000,200);
			
 
				
				         return splitter.split(document);
			
 
				
				     }
			
 
				
				-    interface Assistant {
			
 
				
				-
			
 
				
				-        String chat(String message);
			
 
				
				-    }
			
 
				
				 }
			
--- a/llm-back/vllm_server.py
+++ b/llm-back/vllm_server.py
@@ -0,0 +1,117 @@
 
				
				+from vllm import LLM, SamplingParams
			
 
				
				+from fastapi import FastAPI, Request
			
 
				
				+from fastapi.responses import StreamingResponse
			
 
				
				+import uvicorn
			
 
				
				+import time
			
 
				
				+import json
			
 
				
				+
			
 
				
				+def create_vllm_server(
			
 
				
				+    model: str,
			
 
				
				+    served_model_name: str,
			
 
				
				+    host: str,
			
 
				
				+    port: int,
			
 
				
				+    tensor_parallel_size: int,
			
 
				
				+    top_p: float,
			
 
				
				+    temperature: float,
			
 
				
				+    max_tokens: int,
			
 
				
				+    gpu_memory_utilization: float,
			
 
				
				+    dtype: str,
			
 
				
				+) -> FastAPI:
			
 
				
				+    # 只初始化 LLM
			
 
				
				+    llm = LLM(
			
 
				
				+        model=model,
			
 
				
				+        tensor_parallel_size=tensor_parallel_size,
			
 
				
				+        gpu_memory_utilization=gpu_memory_utilization,
			
 
				
				+        dtype=dtype,
			
 
				
				+    )
			
 
				
				+
			
 
				
				+    sampling_params = SamplingParams(
			
 
				
				+        temperature=temperature,
			
 
				
				+        top_p=top_p,
			
 
				
				+        max_tokens=max_tokens,
			
 
				
				+    )
			
 
				
				+
			
 
				
				+    app = FastAPI()
			
 
				
				+
			
 
				
				+    @app.post("/v1/chat/completions")
			
 
				
				+    async def chat_completions(request: Request):
			
 
				
				+        try:
			
 
				
				+            data = await request.json()
			
 
				
				+            messages = data["messages"]
			
 
				
				+            tools = data.get("tools")  # 支持 tools 参数
			
 
				
				+            created_time = time.time()
			
 
				
				+            request_id = f"chatcmpl-{int(time.time())}"
			
 
				
				+
			
 
				
				+            # 调用 llm.chat()，传入 tools
			
 
				
				+            outputs = llm.chat(
			
 
				
				+                messages=messages,
			
 
				
				+                sampling_params=sampling_params,
			
 
				
				+                tools=tools,
			
 
				
				+            )
			
 
				
				+            if data.get("stream"):
			
 
				
				+                def generate():
			
 
				
				+                    full_text = ""
			
 
				
				+                    for output in outputs:
			
 
				
				+                        new_text = output.outputs[0].text[len(full_text):]
			
 
				
				+                        full_text = output.outputs[0].text
			
 
				
				+                        response_data = {
			
 
				
				+                            "id": request_id,
			
 
				
				+                            "model": served_model_name,
			
 
				
				+                            "created": created_time,
			
 
				
				+                            "choices": [{
			
 
				
				+                                "index": 0,
			
 
				
				+                                "delta": {"content": new_text},
			
 
				
				+                                "finish_reason": output.outputs[0].finish_reason,
			
 
				
				+                            }],
			
 
				
				+                        }
			
 
				
				+                        yield f"data: {json.dumps(response_data)}\n\n"
			
 
				
				+                    yield "data: [DONE]\n\n"
			
 
				
				+
			
 
				
				+                return StreamingResponse(generate(), media_type="text/event-stream")
			
 
				
				+            else:
			
 
				
				+                return {
			
 
				
				+                    "id": request_id,
			
 
				
				+                    "model": served_model_name,
			
 
				
				+                    "created": created_time,
			
 
				
				+                    "choices": [{
			
 
				
				+                        "index": 0,
			
 
				
				+                        "message": {
			
 
				
				+                            "role": "assistant",
			
 
				
				+                            "content": outputs[0].outputs[0].text,
			
 
				
				+                        },
			
 
				
				+                        "finish_reason": outputs[0].outputs[0].finish_reason,
			
 
				
				+                    }],
			
 
				
				+                }
			
 
				
				+
			
 
				
				+        except Exception as e:
			
 
				
				+            return {"error": str(e)}, 400
			
 
				
				+
			
 
				
				+    return app
			
 
				
				+
			
 
				
				+if __name__ == "__main__":
			
 
				
				+    # 配置参数
			
 
				
				+    CONFIG = {
			
 
				
				+        "model": "/mnt/d/Qwen/Qwen2.5-1.5B-Instruct",
			
 
				
				+        "served_model_name": "Qwen2.5-1.5B-Instruct",
			
 
				
				+        # "model": "/mnt/d/Deepseek/DeepSeek-R1-Distill-Qwen-1.5B",
			
 
				
				+        # "served_model_name": "DeepSeek-R1-Distill-Qwen-1.5B",
			
 
				
				+        "host": "172.25.231.226",
			
 
				
				+        "port": 8000,
			
 
				
				+        "tensor_parallel_size": 1,
			
 
				
				+        "top_p": 0.9,
			
 
				
				+        "temperature": 0.7,
			
 
				
				+        "max_tokens": 8192,
			
 
				
				+        "gpu_memory_utilization": 0.9,
			
 
				
				+        "dtype": "float16"
			
 
				
				+    }
			
 
				
				+    
			
 
				
				+    # 创建应用
			
 
				
				+    app = create_vllm_server(**CONFIG)
			
 
				
				+    
			
 
				
				+    # 启动服务器
			
 
				
				+    uvicorn.run(
			
 
				
				+        app,
			
 
				
				+        host=CONFIG["host"],
			
 
				
				+        port=CONFIG["port"],
			
 
				
				+        workers= 1,
			
 
				
				+    )