Skip to content

Commit

Permalink
add possiibility to save the code
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed Sep 24, 2024
1 parent ce841e2 commit bcf02e5
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 9 deletions.
1 change: 1 addition & 0 deletions examples/code_generation/simple_with_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class Projects(BaseModel):
"validation": 3,
"semantic": 3
},
"output_file_name": "extracted_data.py"
}

# ************************************************
Expand Down
27 changes: 27 additions & 0 deletions extract_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
def extract_data(html: str) -> dict:
from bs4 import BeautifulSoup

# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')

# Initialize an empty list to hold project data
projects = []

# Find all project entries in the HTML
project_entries = soup.find_all('div', class_='grid-item')

# Iterate over each project entry to extract title and description
for entry in project_entries:
# Extract the title from the h4 element
title = entry.find('h4', class_='card-title').get_text(strip=True)
# Extract the description from the p element
description = entry.find('p', class_='card-text').get_text(strip=True)

# Append the extracted data as a dictionary to the projects list
projects.append({
'title': title,
'description': description
})

# Return the structured data as a dictionary matching the desired JSON schema
return {'projects': projects}
37 changes: 28 additions & 9 deletions scrapegraphai/graphs/code_generator_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@

class CodeGeneratorGraph(AbstractGraph):
"""
CodeGeneratorGraph is a script generator pipeline that generates the function extract_data(html: str) -> dict() for
extarcting the wanted informations from a HTML page. The code generated is in Python and uses the library BeautifulSoup.
It requires a user prompt, a source URL, and a output schema.
CodeGeneratorGraph is a script generator pipeline that generates the function extract_data(html: str) -> dict() for
extracting the wanted information from a HTML page. The code generated is in Python and uses the library BeautifulSoup.
It requires a user prompt, a source URL, and an output schema.
Attributes:
prompt (str): The prompt for the graph.
source (str): The source of the graph.
config (dict): Configuration parameters for the graph.
schema (BaseModel): The schema for the graph output.
llm_model: An instance of a language model client, configured for generating answers.
embedder_model: An instance of an embedding model client,
embedder_model: An instance of an embedding model client,
configured for generating embeddings.
verbose (bool): A flag indicating whether to show print statements during execution.
headless (bool): A flag indicating whether to run the graph in headless mode.
Expand Down Expand Up @@ -96,7 +96,6 @@ def _create_graph(self) -> BaseGraph:
"schema": self.schema,
}
)

prompt_refier_node = PromptRefinerNode(
input="user_prompt",
output=["refined_prompt"],
Expand All @@ -106,7 +105,6 @@ def _create_graph(self) -> BaseGraph:
"schema": self.schema
}
)

html_analyzer_node = HtmlAnalyzerNode(
input="refined_prompt & original_html",
output=["html_info", "reduced_html"],
Expand All @@ -117,7 +115,6 @@ def _create_graph(self) -> BaseGraph:
"reduction": self.config.get("reduction", 0)
}
)

generate_code_node = GenerateCodeNode(
input="user_prompt & refined_prompt & html_info & reduced_html & answer",
output=["generated_code"],
Expand Down Expand Up @@ -166,4 +163,26 @@ def run(self) -> str:
inputs = {"user_prompt": self.prompt, self.input_key: self.source}
self.final_state, self.execution_info = self.graph.execute(inputs)

return self.final_state.get("generated_code", "No code created.")
generated_code = self.final_state.get("generated_code", "No code created.")

if self.config.get("filename") is None:
filename = "extracted_data.py"
elif ".py" not in self.config.get("filename"):
filename += ".py"
else:
filename = self.config.get("filename")

self.save_code_to_file(generated_code, filename)

return generated_code

def save_code_to_file(self, code: str, filename:str) -> None:
"""
Saves the generated code to a Python file.
Args:
code (str): The generated code to be saved.
filename (str): name of the output file
"""
with open(filename, "w") as file:
file.write(code)

0 comments on commit bcf02e5

Please sign in to comment.