Skip to content

Commit

Permalink
Merge pull request FlowiseAI#94 from FlowiseAI/feature/MetadataFilter
Browse files Browse the repository at this point in the history
Feature/Add metadata filter
  • Loading branch information
HenryHengZJ authored May 12, 2023
2 parents 7313cdd + f3201ae commit ad5845f
Show file tree
Hide file tree
Showing 21 changed files with 1,491 additions and 48 deletions.
34 changes: 30 additions & 4 deletions packages/components/nodes/documentloaders/Cheerio/Cheerio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,21 @@ class Cheerio_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata

let url = nodeData.inputs?.url as string

var urlPattern = new RegExp(
Expand All @@ -50,14 +59,31 @@ class Cheerio_DocumentLoaders implements INode {
) // validate fragment locator

const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '')
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

Expand Down
33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Csv/Csv.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ class Csv_DocumentLoaders implements INode {
description: 'Extracting a single column',
placeholder: 'Enter column name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
Expand All @@ -49,17 +56,35 @@ class Csv_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const csvFileBase64 = nodeData.inputs?.csvFile as string
const columnName = nodeData.inputs?.columnName as string
const metadata = nodeData.inputs?.metadata

const blob = new Blob(getBlob(csvFileBase64))
const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

Expand Down
33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Docx/Docx.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,24 +33,49 @@ class Docx_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const docxFileBase64 = nodeData.inputs?.docxFile as string
const metadata = nodeData.inputs?.metadata

const blob = new Blob(getBlob(docxFileBase64))
const loader = new DocxLoader(blob)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

Expand Down
33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Folder/Folder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,21 @@ class Folder_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const folderPath = nodeData.inputs?.folderPath as string
const metadata = nodeData.inputs?.metadata

const loader = new DirectoryLoader(folderPath, {
'.json': (path) => new JSONLoader(path),
Expand All @@ -53,14 +61,31 @@ class Folder_DocumentLoaders implements INode {
// @ts-ignore
'.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
})
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

Expand Down
33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Github/Github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,13 @@ class Github_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
Expand All @@ -54,6 +61,7 @@ class Github_DocumentLoaders implements INode {
const branch = nodeData.inputs?.branch as string
const accessToken = nodeData.inputs?.accessToken as string
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata

const options: GithubRepoLoaderParams = {
branch,
Expand All @@ -64,14 +72,31 @@ class Github_DocumentLoaders implements INode {
if (accessToken) options.accessToken = accessToken

const loader = new GithubRepoLoader(repoLink, options)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

Expand Down
33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Json/Json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ class Json_DocumentLoaders implements INode {
description: 'Extracting multiple pointers',
placeholder: 'Enter pointers name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
Expand All @@ -49,6 +56,7 @@ class Json_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const jsonFileBase64 = nodeData.inputs?.jsonFile as string
const pointersName = nodeData.inputs?.pointersName as string
const metadata = nodeData.inputs?.metadata

let pointers: string[] = []
if (pointersName) {
Expand All @@ -58,14 +66,31 @@ class Json_DocumentLoaders implements INode {

const blob = new Blob(getBlob(jsonFileBase64))
const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

Expand Down
33 changes: 29 additions & 4 deletions packages/components/nodes/documentloaders/Notion/Notion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,48 @@ class Notion_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}

async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const notionFolder = nodeData.inputs?.notionFolder as string
const metadata = nodeData.inputs?.metadata

const loader = new NotionLoader(notionFolder)
let docs = []

if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}

if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}

return docs
}
}

Expand Down
Loading

0 comments on commit ad5845f

Please sign in to comment.