Skip to content

Commit

Permalink
fix: add json integration
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed May 9, 2024
1 parent 28c9dce commit 0ab31c3
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 4 deletions.
4 changes: 2 additions & 2 deletions scrapegraphai/graphs/json_scraper_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def _create_graph(self) -> BaseGraph:
"""

fetch_node = FetchNode(
input="json_dir",
input="json",
output=["doc"],
)
parse_node = ParseNode(
Expand Down Expand Up @@ -106,4 +106,4 @@ def run(self) -> str:
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
self.final_state, self.execution_info = self.graph.execute(inputs)

return self.final_state.get("answer", "No answer found.")
return self.final_state.get("answer", "No answer found.")
10 changes: 8 additions & 2 deletions scrapegraphai/nodes/fetch_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
FetchNode Module
"""
import pandas as pd
import json
from typing import List, Optional
from langchain_community.document_loaders import AsyncChromiumLoader
from langchain_core.documents import Document
Expand Down Expand Up @@ -75,8 +76,13 @@ def execute(self, state):
compressed_document = loader.load()

elif self.input == "csv":
compressed_document = [Document(page_content=pd.read_csv(source), metadata={
"source": "xml"
compressed_document = [Document(page_content=str(pd.read_csv(source)), metadata={
"source": "csv"
})]
elif self.input == "json":
f = open(source)
compressed_document = [Document(page_content=str(json.load(f)), metadata={
"source": "json"
})]
elif self.input == "xml":
with open(source, 'r', encoding='utf-8') as f:
Expand Down

0 comments on commit 0ab31c3

Please sign in to comment.