|
|
@@ -11,7 +11,10 @@ import io.milvus.client.MilvusServiceClient;
|
|
11
|
11
|
import io.milvus.grpc.MutationResult;
|
|
12
|
12
|
import io.milvus.grpc.SearchResults;
|
|
13
|
13
|
import io.milvus.param.ConnectParam;
|
|
|
14
|
+import io.milvus.param.MetricType;
|
|
14
|
15
|
import io.milvus.param.R;
|
|
|
16
|
+import io.milvus.param.RpcStatus;
|
|
|
17
|
+import io.milvus.param.collection.LoadCollectionParam;
|
|
15
|
18
|
import io.milvus.param.dml.InsertParam;
|
|
16
|
19
|
import io.milvus.param.dml.SearchParam;
|
|
17
|
20
|
import io.milvus.response.SearchResultsWrapper;
|
|
|
@@ -41,7 +44,7 @@ public class LangChainMilvusService {
|
|
41
|
44
|
.withPort(port)
|
|
42
|
45
|
.build()
|
|
43
|
46
|
);
|
|
44
|
|
- this.LLM_SERVICE_URL = "http://" + host + ":8000/generate";
|
|
|
47
|
+ this.LLM_SERVICE_URL = "http://" + host + ":8080/generate";
|
|
45
|
48
|
this.collectionName = collectionName;
|
|
46
|
49
|
this.embeddingModel = embeddingModel;
|
|
47
|
50
|
}
|
|
|
@@ -50,10 +53,11 @@ public class LangChainMilvusService {
|
|
50
|
53
|
// 加载文档
|
|
51
|
54
|
InputStream fileInputStream = new FileInputStream(file);
|
|
52
|
55
|
Document document = new ApachePdfBoxDocumentParser().parse(fileInputStream);
|
|
53
|
|
- DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(150,10);;
|
|
|
56
|
+ DocumentByParagraphSplitter splitter = new DocumentByParagraphSplitter(1024,0);;
|
|
54
|
57
|
List<TextSegment> segments = splitter.split(document);
|
|
55
|
58
|
|
|
56
|
59
|
// 提取文本和生成嵌入
|
|
|
60
|
+ List<String> fileNames = new ArrayList<>();
|
|
57
|
61
|
List<String> texts = new ArrayList<>();
|
|
58
|
62
|
List<List<Float>> embeddings = new ArrayList<>();
|
|
59
|
63
|
|
|
|
@@ -61,15 +65,16 @@ public class LangChainMilvusService {
|
|
61
|
65
|
String text = segment.text();
|
|
62
|
66
|
if (text.trim().isEmpty())
|
|
63
|
67
|
continue;
|
|
|
68
|
+ fileNames.add(file.getName());
|
|
64
|
69
|
texts.add(text);
|
|
65
|
70
|
embeddings.add(embeddingModel.embed(text).content().vectorAsList());
|
|
66
|
71
|
}
|
|
67
|
72
|
|
|
68
|
73
|
// 准备插入数据
|
|
69
|
74
|
List<InsertParam.Field> fields = new ArrayList<>();
|
|
70
|
|
- fields.add(new InsertParam.Field("file_name", Arrays.asList(file.getName())));
|
|
71
|
|
- fields.add(new InsertParam.Field("content", Arrays.asList(texts)));
|
|
72
|
|
- fields.add(new InsertParam.Field("embedding", Arrays.asList(embeddings)));
|
|
|
75
|
+ fields.add(new InsertParam.Field("file_name", fileNames));
|
|
|
76
|
+ fields.add(new InsertParam.Field("content", texts));
|
|
|
77
|
+ fields.add(new InsertParam.Field("embedding", embeddings));
|
|
73
|
78
|
|
|
74
|
79
|
InsertParam insertParam = InsertParam.newBuilder()
|
|
75
|
80
|
.withCollectionName(collectionName)
|
|
|
@@ -86,13 +91,29 @@ public class LangChainMilvusService {
|
|
86
|
91
|
|
|
87
|
92
|
// 从Milvus检索相关文档
|
|
88
|
93
|
public List<String> retrieveFromMilvus(String query, int topK) throws IOException {
|
|
89
|
|
- List<Float> queryVector = embeddingModel.embed(query).content().vectorAsList();
|
|
|
94
|
+ List<List<Float>> queryVector = Arrays.asList(embeddingModel.embed(query).content().vectorAsList());
|
|
90
|
95
|
|
|
|
96
|
+ // 加载集合
|
|
|
97
|
+ LoadCollectionParam loadParam = LoadCollectionParam.newBuilder()
|
|
|
98
|
+ .withCollectionName(collectionName)
|
|
|
99
|
+ .build();
|
|
|
100
|
+
|
|
|
101
|
+ R<RpcStatus> loadResponse = milvusClient.loadCollection(loadParam);
|
|
|
102
|
+ if (loadResponse.getStatus() != R.Status.Success.getCode()) {
|
|
|
103
|
+ System.err.println("Failed to load collection: " + loadResponse.getMessage());
|
|
|
104
|
+ milvusClient.close();
|
|
|
105
|
+ return null;
|
|
|
106
|
+ }
|
|
|
107
|
+
|
|
|
108
|
+ // 构建SearchParam
|
|
91
|
109
|
SearchParam searchParam = SearchParam.newBuilder()
|
|
92
|
110
|
.withCollectionName(collectionName)
|
|
93
|
111
|
.withVectors(queryVector)
|
|
94
|
112
|
.withTopK(topK)
|
|
95
|
113
|
.withOutFields(Arrays.asList("content"))
|
|
|
114
|
+ .withVectorFieldName("embedding")
|
|
|
115
|
+ .withMetricType(MetricType.COSINE)
|
|
|
116
|
+ .withParams("{\"nprobe\": 1}")
|
|
96
|
117
|
.build();
|
|
97
|
118
|
|
|
98
|
119
|
R<SearchResults> response = milvusClient.search(searchParam);
|
|
|
@@ -110,7 +131,7 @@ public class LangChainMilvusService {
|
|
110
|
131
|
HttpUrl url = HttpUrl.parse(LLM_SERVICE_URL)
|
|
111
|
132
|
.newBuilder()
|
|
112
|
133
|
.addQueryParameter("prompt", prompt)
|
|
113
|
|
- .addQueryParameter("max_token", String.valueOf(512))
|
|
|
134
|
+ .addQueryParameter("max_token", String.valueOf(1024))
|
|
114
|
135
|
.build();
|
|
115
|
136
|
|
|
116
|
137
|
Request request = new Request.Builder()
|