Skip to content

Commit

Permalink
Add mongo index (labring#519)
Browse files Browse the repository at this point in the history
  • Loading branch information
c121914yu committed Nov 26, 2023
1 parent f818260 commit 933c3fd
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 31 deletions.
6 changes: 3 additions & 3 deletions packages/service/common/mongo/init.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ export async function connectMongo({
bufferCommands: true,
maxConnecting: Number(process.env.DB_MAX_LINK || 5),
maxPoolSize: Number(process.env.DB_MAX_LINK || 5),
minPoolSize: 2,
connectTimeoutMS: 20000,
waitQueueTimeoutMS: 20000
minPoolSize: Number(process.env.DB_MAX_LINK || 10) * 0.5,
connectTimeoutMS: 60000,
waitQueueTimeoutMS: 60000
});

console.log('mongo connected');
Expand Down
1 change: 1 addition & 0 deletions packages/service/core/dataset/collection/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ const DatasetCollectionSchema = new Schema({

try {
DatasetCollectionSchema.index({ datasetId: 1 });
DatasetCollectionSchema.index({ datasetId: 1, parentId: 1 });
DatasetCollectionSchema.index({ updateTime: -1 });
} catch (error) {
console.log(error);
Expand Down
3 changes: 2 additions & 1 deletion packages/service/core/dataset/data/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ try {
DatasetDataSchema.index({ datasetId: 1 });
DatasetDataSchema.index({ collectionId: 1 });
// full text index
DatasetDataSchema.index({ fullTextToken: 'text' });
DatasetDataSchema.index({ datasetId: 1, fullTextToken: 'text' });
DatasetDataSchema.index({ fullTextToken: 1 });
} catch (error) {
console.log(error);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,21 +283,21 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] {
value: formData.dataset.datasets,
type: FlowNodeInputTypeEnum.custom,
label: '关联的知识库',
connected: true
connected: false
},
{
key: 'similarity',
value: formData.dataset.similarity,
type: FlowNodeInputTypeEnum.slider,
label: '相似度',
connected: true
connected: false
},
{
key: 'limit',
value: formData.dataset.limit,
type: FlowNodeInputTypeEnum.slider,
label: '单次搜索上限',
connected: true
connected: false
},
{
key: 'switch',
Expand All @@ -317,7 +317,7 @@ function datasetTemplate(formData: AppSimpleEditFormType): ModuleItemType[] {
label: '结果重排',
description: '将召回的结果进行进一步重排,可增加召回率',
plusField: true,
connected: true,
connected: false,
value: formData.dataset.rerank
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ function ConfigForm({
})
}
>
<Image alt={''} src={item.avatar} w={'18px'} mr={1} />
<Avatar src={item.avatar} w={'18px'} mr={1} />
<Box flex={'1 0 0'} w={0} className={'textEllipsis'} fontSize={'sm'}>
{item.name}
</Box>
Expand Down
70 changes: 49 additions & 21 deletions projects/app/src/service/core/dataset/data/pg.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { PgDatasetTableName } from '@fastgpt/global/core/dataset/constant';
import type {
DatasetDataWithCollectionType,
DatasetDataSchemaType,
SearchDataResponseItemType
} from '@fastgpt/global/core/dataset/type.d';
import { PgClient } from '@fastgpt/service/common/pg';
Expand Down Expand Up @@ -298,30 +298,58 @@ export async function fullTextRecall({
};
}

const result = (await MongoDatasetData.find(
let searchResults = (
await Promise.all(
datasetIds.map((id) =>
MongoDatasetData.find(
{
datasetId: id,
$text: { $search: jiebaSplit({ text }) }
},
{
score: { $meta: 'textScore' },
_id: 1,
datasetId: 1,
collectionId: 1,
q: 1,
a: 1,
indexes: 1
}
)
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.lean()
)
)
).flat() as (DatasetDataSchemaType & { score: number })[];

// resort
searchResults.sort((a, b) => b.score - a.score);
searchResults.slice(0, limit);

const collections = await MongoDatasetCollection.find(
{
datasetId: { $in: datasetIds.map((item) => item) },
$text: { $search: jiebaSplit({ text }) }
_id: { $in: searchResults.map((item) => item.collectionId) }
},
{ score: { $meta: 'textScore' } }
)
.sort({ score: { $meta: 'textScore' } })
.limit(limit)
.populate('collectionId')
.lean()) as DatasetDataWithCollectionType[];
'_id name metadata'
);

return {
fullTextRecallResults: result.map((item) => ({
id: String(item._id),
datasetId: String(item.datasetId),
collectionId: String(item.collectionId._id),
sourceName: item.collectionId.name || '',
sourceId: item.collectionId.metadata?.fileId || item.collectionId.metadata?.rawLink,
q: item.q,
a: item.a,
indexes: item.indexes,
score: 1
})),
fullTextRecallResults: searchResults.map((item) => {
const collection = collections.find((col) => String(col._id) === String(item.collectionId));
return {
id: String(item._id),
datasetId: String(item.datasetId),
collectionId: String(item.collectionId),
sourceName: collection?.name || '',
sourceId: collection?.metadata?.fileId || collection?.metadata?.rawLink,
q: item.q,
a: item.a,
indexes: item.indexes,
// @ts-ignore
score: item.score
};
}),
tokenLen: 0
};
}
Expand Down
2 changes: 1 addition & 1 deletion projects/app/src/web/core/app/templates.ts
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ export const appTemplates: (AppItemType & {
]
},
{
id: 'simpleKbChat',
id: 'simpleDatasetChat',
avatar: '/imgs/module/db.png',
name: '知识库 + 对话引导',
intro: '每次提问时进行一次知识库搜索,将搜索结果注入 LLM 模型进行参考回答',
Expand Down

0 comments on commit 933c3fd

Please sign in to comment.