tweaks for timeout issues to scraper

mokhairy2019 · Jun 23, 2023 · 611a819 · 611a819
1 parent c82a6a7
commit 611a819
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 13 deletions.
diff --git a/Backend/scraper/__pycache__/amazon.cpython-311.pyc b/Backend/scraper/__pycache__/amazon.cpython-311.pyc
diff --git a/Backend/scraper/__pycache__/main.cpython-311.pyc b/Backend/scraper/__pycache__/main.cpython-311.pyc
diff --git a/Backend/scraper/amazon.py b/Backend/scraper/amazon.py
@@ -28,7 +28,11 @@ async def get_product(product_div):
     # Fetch all attributes and text at once
     image_url = await image_element.get_attribute('src') if image_element else None
     product_name = await name_element.inner_text() if name_element else None
-    product_price = float((await price_element.inner_text()).replace("$", "").replace(",", "")) if price_element else None
+    try:
+        print((await price_element.inner_text()).replace("$", "").replace(",", "").strip())
+        product_price = float((await price_element.inner_text()).replace("$", "").replace(",", "").strip()) if price_element else None
+    except:
+        product_price = None
     product_url = "/".join((await url_element.get_attribute('href')).split("/")[:4]) if url_element else None
     # stock = stock_element[0] if len(stock_element) > 0 else None
 

diff --git a/Backend/scraper/main.py b/Backend/scraper/main.py
@@ -10,14 +10,9 @@
 
 URLS = {
     AMAZON: {
-        "search_field_id": "twotabsearchtextbox",
-        "search_button_id": "nav-search-submit-button",
+        "search_field_query": 'input[name="field-keywords"]',
+        "search_button_query": 'input[value="Go"]',
         "product_selector": "div.s-card-container"
-    },
-    CANADA_COMPUTERS: {
-        "search_field_id": "cc_quick_search",
-        "search_button_id": "search_btn",
-        "product_selector": ""
     }
 }
 
@@ -37,14 +32,16 @@ def load_auth():
 
 async def search(metadata, page, search_text):
     print(f"Searching for {search_text} on {page.url}")
-    search_field_id = metadata.get("search_field_id")
-    search_button_id = metadata.get("search_button_id")
+    search_field_query = metadata.get("search_field_query")
+    search_button_query = metadata.get("search_button_query")
 
-    if search_field_id:
+    if search_field_query and search_button_query:
         print("Filling input field")
-        await page.fill(f"#{search_field_id}", search_text)
+        search_box = await page.wait_for_selector(search_field_query)
+        await search_box.type(search_text)
         print("Pressing search button")
-        await page.click(f"#{search_button_id}")
+        button = await page.wait_for_selector(search_button_query)
+        await button.click()
     else:
         raise Exception("Could not search.")