Skip to content

Commit

Permalink
tweaks for timeout issues to scraper
Browse files Browse the repository at this point in the history
  • Loading branch information
techwithtim committed Jun 23, 2023
1 parent c82a6a7 commit 611a819
Show file tree
Hide file tree
Showing 4 changed files with 14 additions and 13 deletions.
Binary file modified Backend/scraper/__pycache__/amazon.cpython-311.pyc
Binary file not shown.
Binary file modified Backend/scraper/__pycache__/main.cpython-311.pyc
Binary file not shown.
6 changes: 5 additions & 1 deletion Backend/scraper/amazon.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,11 @@ async def get_product(product_div):
# Fetch all attributes and text at once
image_url = await image_element.get_attribute('src') if image_element else None
product_name = await name_element.inner_text() if name_element else None
product_price = float((await price_element.inner_text()).replace("$", "").replace(",", "")) if price_element else None
try:
print((await price_element.inner_text()).replace("$", "").replace(",", "").strip())
product_price = float((await price_element.inner_text()).replace("$", "").replace(",", "").strip()) if price_element else None
except:
product_price = None
product_url = "/".join((await url_element.get_attribute('href')).split("/")[:4]) if url_element else None
# stock = stock_element[0] if len(stock_element) > 0 else None

Expand Down
21 changes: 9 additions & 12 deletions Backend/scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,9 @@

URLS = {
AMAZON: {
"search_field_id": "twotabsearchtextbox",
"search_button_id": "nav-search-submit-button",
"search_field_query": 'input[name="field-keywords"]',
"search_button_query": 'input[value="Go"]',
"product_selector": "div.s-card-container"
},
CANADA_COMPUTERS: {
"search_field_id": "cc_quick_search",
"search_button_id": "search_btn",
"product_selector": ""
}
}

Expand All @@ -37,14 +32,16 @@ def load_auth():

async def search(metadata, page, search_text):
print(f"Searching for {search_text} on {page.url}")
search_field_id = metadata.get("search_field_id")
search_button_id = metadata.get("search_button_id")
search_field_query = metadata.get("search_field_query")
search_button_query = metadata.get("search_button_query")

if search_field_id:
if search_field_query and search_button_query:
print("Filling input field")
await page.fill(f"#{search_field_id}", search_text)
search_box = await page.wait_for_selector(search_field_query)
await search_box.type(search_text)
print("Pressing search button")
await page.click(f"#{search_button_id}")
button = await page.wait_for_selector(search_button_query)
await button.click()
else:
raise Exception("Could not search.")

Expand Down

0 comments on commit 611a819

Please sign in to comment.