-
Notifications
You must be signed in to change notification settings - Fork 0
/
C_Get_the_right_number_of_site.py
59 lines (46 loc) · 1.71 KB
/
C_Get_the_right_number_of_site.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
'''
This code imports the function that loads the json file with functioning
sites (this json file is created in the file named: Get_list_functional sites),
and gets the number of the site that contains, in our case the 250. person.
This number can be modified.
'''
import requests
from bs4 import BeautifulSoup
from B_load_json_file_fction import load_sites_to_visit
'''
sites_to_visit = load_sites_to_visit('sites_to_visit.json')
'''
def find_valid_sites(sites_to_visit, number):
"""
Finds the first valid site from a list of URLs that contains the specified number in the tag.
Prints the extracted number if a valid site is found.
Arguments:
- sites_to_visit (list): A list of URLs to visit.
- number (str or int): The number to search for in the tag.
Returns:
- extracted_number (str): The extracted number from the valid site's URL, or None if no valid site is found.
"""
extracted_number = None
for site in sites_to_visit:
response = requests.get(site)
soup = BeautifulSoup(response.content, 'lxml')
tags = soup.find_all('td', class_='rank')
contains_valid_tag = False
for tag in tags:
if tag.get_text().strip() == str(number): # Check for exact match with the number
contains_valid_tag = True
break
if contains_valid_tag:
extracted_number = site.split('/')[-2]
#print(extracted_number)
break
if extracted_number is None:
print("No valid site found.")
return extracted_number
#example usage:
'''
number_to_search = 250
extracted_number = find_valid_sites(sites_to_visit, number_to_search)
print(extracted_number)
N = extracted_number
'''