Skip to content

Commit a84007a

Browse files
authored
Dataset collection forbid (#1883)
* Update 484.md * Update 485.md * Update README.md * Update config.yml * perf: tool call support same id * feat: collection forbid * feat: collection forbid
1 parent 96a03e9 commit a84007a

File tree

37 files changed

+615
-451
lines changed

37 files changed

+615
-451
lines changed

.github/ISSUE_TEMPLATE/config.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
blank_issues_enabled: false
22
contact_links:
3-
- name: 微信交流群
4-
url: https://oss.laf.run/htr4n1-images/fastgpt-qr-code.jpg
3+
- name: 飞书话题群
4+
url: https://oss.laf.run/otnvvf-imgs/1719505774252.jpg
55
about: FastGPT 全是问题群

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,9 @@ https://github.com/labring/FastGPT/assets/15308462/7d3a38df-eb0e-4388-9250-2409b
120120

121121
## 🏘️ 社区交流群
122122

123-
wx 扫一下加入
123+
扫码加入飞书话题群(新开,逐渐弃用微信群)
124124

125-
![](https://oss.laf.run/htr4n1-images/wechat-qr-code.jpg)
125+
![](https://oss.laf.run/otnvvf-imgs/1719505774252.jpg)
126126

127127
<a href="#readme">
128128
<img src="https://img.shields.io/badge/-返回顶部-7d09f1.svg" alt="#" align="right">

docSite/content/zh-cn/docs/development/upgrading/484.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
title: 'V4.8.4'
2+
title: 'V4.8.4(需要初始化)'
33
description: 'FastGPT V4.8.4 更新说明'
44
icon: 'upgrade'
55
draft: false
@@ -35,4 +35,4 @@ curl --location --request POST 'https://{{host}}/api/admin/init/484' \
3535
6. 修复 - 定时执行初始化错误。
3636
7. 修复 - 应用调用传参异常。
3737
8. 修复 - ctrl + cv 复杂节点时,nodeId错误。
38-
9. 调整组件库全局theme。
38+
9. 调整组件库全局theme。

docSite/content/zh-cn/docs/development/upgrading/485.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
---
2-
title: 'V4.8.5'
2+
title: 'V4.8.5(需要初始化)'
33
description: 'FastGPT V4.8.5 更新说明'
44
icon: 'upgrade'
55
draft: false
@@ -58,4 +58,4 @@ curl --location --request POST 'https://{{host}}/api/admin/init/485' \
5858
12. 修复 - 定时任务无法实际关闭
5959
13. 修复 - 输入引导特殊字符导致正则报错
6060
14. 修复 - 文件包含特殊字符`%`,且为转义时会导致页面崩溃
61-
15. 修复 - 自定义输入选择知识库引用时页面崩溃
61+
15. 修复 - 自定义输入选择知识库引用时页面崩溃
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
---
2+
title: 'V4.8.6(进行中)'
3+
description: 'FastGPT V4.8.6 更新说明'
4+
icon: 'upgrade'
5+
draft: false
6+
toc: true
7+
weight: 818
8+
---
9+
10+
11+
## V4.8.6 更新说明
12+
13+
1. 新增 - 知识库支持单个集合禁用功能
14+
2.

packages/global/core/dataset/collection/utils.ts

+11
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import { DatasetCollectionTypeEnum, TrainingModeEnum, TrainingTypeMap } from '../constants';
12
import { CollectionWithDatasetType, DatasetCollectionSchemaType } from '../type';
23

34
export const getCollectionSourceData = (
@@ -12,3 +13,13 @@ export const getCollectionSourceData = (
1213
sourceName: collection?.name || ''
1314
};
1415
};
16+
17+
export const checkCollectionIsFolder = (type: DatasetCollectionTypeEnum) => {
18+
return type === DatasetCollectionTypeEnum.folder || type === DatasetCollectionTypeEnum.virtual;
19+
};
20+
21+
export const getTrainingTypeLabel = (type?: TrainingModeEnum) => {
22+
if (!type) return '';
23+
if (!TrainingTypeMap[type]) return '';
24+
return TrainingTypeMap[type].label;
25+
};

packages/global/core/dataset/type.d.ts

+2
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ export type DatasetCollectionSchemaType = {
4848
type: DatasetCollectionTypeEnum;
4949
createTime: Date;
5050
updateTime: Date;
51+
forbid?: boolean;
5152

5253
trainingType: TrainingModeEnum;
5354
chunkSize: number;
@@ -89,6 +90,7 @@ export type DatasetDataSchemaType = {
8990
updateTime: Date;
9091
q: string; // large chunks or question
9192
a: string; // answer or custom content
93+
forbid?: boolean;
9294
fullTextToken: string;
9395
indexes: DatasetDataIndexItemType[];
9496
rebuilding?: boolean;

packages/service/common/vectorStore/controller.d.ts

+3
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ export type InsertVectorControllerProps = InsertVectorProps & {
2424
export type EmbeddingRecallProps = {
2525
teamId: string;
2626
datasetIds: string[];
27+
28+
forbidCollectionIdList: string[];
29+
// forbidEmbIndexIdList: string[];
2730
// similarity?: number;
2831
// efSearch?: number;
2932
};

packages/service/common/vectorStore/milvus/class.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -213,14 +213,19 @@ export class MilvusCtrl {
213213
};
214214
embRecall = async (props: EmbeddingRecallCtrlProps): Promise<EmbeddingRecallResponse> => {
215215
const client = await this.getClient();
216-
const { teamId, datasetIds, vector, limit, retry = 2 } = props;
216+
const { teamId, datasetIds, vector, limit, forbidCollectionIdList, retry = 2 } = props;
217+
218+
const forbidColQuery =
219+
forbidCollectionIdList.length > 0
220+
? `and (collectionId not in [${forbidCollectionIdList.map((id) => `"${String(id)}"`).join(',')}])`
221+
: '';
217222

218223
try {
219224
const { results } = await client.search({
220225
collection_name: DatasetVectorTableName,
221226
data: vector,
222227
limit,
223-
filter: `(teamId == "${teamId}") and (datasetId in [${datasetIds.map((id) => `"${String(id)}"`).join(',')}])`,
228+
filter: `(teamId == "${teamId}") and (datasetId in [${datasetIds.map((id) => `"${String(id)}"`).join(',')}]) ${forbidColQuery}`,
224229
output_fields: ['collectionId']
225230
});
226231

packages/service/common/vectorStore/pg/class.ts

+22-1
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,29 @@ export class PgVectorCtrl {
118118
}
119119
};
120120
embRecall = async (props: EmbeddingRecallCtrlProps): Promise<EmbeddingRecallResponse> => {
121-
const { teamId, datasetIds, vector, limit, retry = 2 } = props;
121+
const { teamId, datasetIds, vector, limit, forbidCollectionIdList, retry = 2 } = props;
122+
123+
const forbidCollectionSql =
124+
forbidCollectionIdList.length > 0
125+
? `AND collection_id NOT IN (${forbidCollectionIdList.map((id) => `'${String(id)}'`).join(',')})`
126+
: 'AND collection_id IS NOT NULL';
127+
// const forbidDataSql =
128+
// forbidEmbIndexIdList.length > 0 ? `AND id NOT IN (${forbidEmbIndexIdList.join(',')})` : '';
122129

123130
try {
131+
// const explan: any = await PgClient.query(
132+
// `BEGIN;
133+
// SET LOCAL hnsw.ef_search = ${global.systemEnv?.pgHNSWEfSearch || 100};
134+
// EXPLAIN ANALYZE select id, collection_id, vector <#> '[${vector}]' AS score
135+
// from ${DatasetVectorTableName}
136+
// where team_id='${teamId}'
137+
// AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
138+
// ${forbidCollectionSql}
139+
// order by score limit ${limit};
140+
// COMMIT;`
141+
// );
142+
// console.log(explan[2].rows);
143+
124144
const results: any = await PgClient.query(
125145
`
126146
BEGIN;
@@ -129,6 +149,7 @@ export class PgVectorCtrl {
129149
from ${DatasetVectorTableName}
130150
where team_id='${teamId}'
131151
AND dataset_id IN (${datasetIds.map((id) => `'${String(id)}'`).join(',')})
152+
${forbidCollectionSql}
132153
order by score limit ${limit};
133154
COMMIT;`
134155
);

packages/service/core/dataset/collection/schema.ts

+20-15
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,15 @@ const DatasetCollectionSchema = new Schema({
4848
type: Date,
4949
default: () => new Date()
5050
},
51+
forbid: {
52+
type: Boolean,
53+
default: false
54+
},
5155

5256
// chunk filed
5357
trainingType: {
5458
type: String,
55-
enum: Object.keys(TrainingTypeMap),
56-
required: true
59+
enum: Object.keys(TrainingTypeMap)
5760
},
5861
chunkSize: {
5962
type: Number,
@@ -91,23 +94,25 @@ const DatasetCollectionSchema = new Schema({
9194
}
9295
});
9396

97+
export const MongoDatasetCollection: Model<DatasetCollectionSchemaType> =
98+
models[DatasetColCollectionName] || model(DatasetColCollectionName, DatasetCollectionSchema);
99+
94100
try {
95101
// auth file
96-
DatasetCollectionSchema.index({ teamId: 1, fileId: 1 }, { background: true });
102+
DatasetCollectionSchema.index({ teamId: 1, fileId: 1 });
97103

98104
// list collection; deep find collections
99-
DatasetCollectionSchema.index(
100-
{
101-
teamId: 1,
102-
datasetId: 1,
103-
parentId: 1,
104-
updateTime: -1
105-
},
106-
{ background: true }
107-
);
105+
DatasetCollectionSchema.index({
106+
teamId: 1,
107+
datasetId: 1,
108+
parentId: 1,
109+
updateTime: -1
110+
});
111+
112+
// get forbid
113+
// DatasetCollectionSchema.index({ teamId: 1, datasetId: 1, forbid: 1 });
114+
115+
MongoDatasetCollection.syncIndexes({ background: true });
108116
} catch (error) {
109117
console.log(error);
110118
}
111-
112-
export const MongoDatasetCollection: Model<DatasetCollectionSchemaType> =
113-
models[DatasetColCollectionName] || model(DatasetColCollectionName, DatasetCollectionSchema);

packages/service/core/dataset/data/schema.ts

+16-16
Original file line numberDiff line numberDiff line change
@@ -77,27 +77,27 @@ const DatasetDataSchema = new Schema({
7777
rebuilding: Boolean
7878
});
7979

80+
export const MongoDatasetData: Model<DatasetDataSchemaType> =
81+
models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);
82+
8083
try {
8184
// list collection and count data; list data; delete collection(relate data)
82-
DatasetDataSchema.index(
83-
{ teamId: 1, datasetId: 1, collectionId: 1, chunkIndex: 1, updateTime: -1 },
84-
{ background: true }
85-
);
85+
DatasetDataSchema.index({
86+
teamId: 1,
87+
datasetId: 1,
88+
collectionId: 1,
89+
chunkIndex: 1,
90+
updateTime: -1
91+
});
8692
// full text index
87-
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' }, { background: true });
93+
DatasetDataSchema.index({ teamId: 1, datasetId: 1, fullTextToken: 'text' });
8894
// Recall vectors after data matching
89-
DatasetDataSchema.index(
90-
{ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 },
91-
{ background: true }
92-
);
93-
DatasetDataSchema.index({ updateTime: 1 }, { background: true });
95+
DatasetDataSchema.index({ teamId: 1, datasetId: 1, collectionId: 1, 'indexes.dataId': 1 });
96+
DatasetDataSchema.index({ updateTime: 1 });
9497
// rebuild data
95-
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 }, { background: true });
98+
DatasetDataSchema.index({ rebuilding: 1, teamId: 1, datasetId: 1 });
99+
100+
MongoDatasetData.syncIndexes({ background: true });
96101
} catch (error) {
97102
console.log(error);
98103
}
99-
100-
export const MongoDatasetData: Model<DatasetDataSchemaType> =
101-
models[DatasetDataCollectionName] || model(DatasetDataCollectionName, DatasetDataSchema);
102-
103-
MongoDatasetData.syncIndexes();

0 commit comments

Comments
 (0)