-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ce841e2
commit bcf02e5
Showing
3 changed files
with
56 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
def extract_data(html: str) -> dict: | ||
from bs4 import BeautifulSoup | ||
|
||
# Parse the HTML content using BeautifulSoup | ||
soup = BeautifulSoup(html, 'html.parser') | ||
|
||
# Initialize an empty list to hold project data | ||
projects = [] | ||
|
||
# Find all project entries in the HTML | ||
project_entries = soup.find_all('div', class_='grid-item') | ||
|
||
# Iterate over each project entry to extract title and description | ||
for entry in project_entries: | ||
# Extract the title from the h4 element | ||
title = entry.find('h4', class_='card-title').get_text(strip=True) | ||
# Extract the description from the p element | ||
description = entry.find('p', class_='card-text').get_text(strip=True) | ||
|
||
# Append the extracted data as a dictionary to the projects list | ||
projects.append({ | ||
'title': title, | ||
'description': description | ||
}) | ||
|
||
# Return the structured data as a dictionary matching the desired JSON schema | ||
return {'projects': projects} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters