-
Notifications
You must be signed in to change notification settings - Fork 1
/
Frankfurt.py
66 lines (57 loc) · 2.88 KB
/
Frankfurt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from bs4 import BeautifulSoup
from park_api.util import convert_date
from park_api.geodata import GeoData
import requests
# This loads the geodata for this city if <city>.geojson exists in the same directory as this file.
# No need to remove this if there's no geodata (yet), everything will still work.
geodata = GeoData(__file__)
# This function is called by the scraper and given the data of the page specified as source in geojson above.
# It's supposed to return a dictionary containing everything the current spec expects. Tests will fail if it doesn't ;)
def parse_html(html):
# BeautifulSoup is a great and easy way to parse the html and find the bits and pieces we're looking for.
soup = BeautifulSoup(html, "html.parser")
r = requests.get('http://offenedaten.frankfurt.de/dataset/e821f156-69cf-4dd0-9ffe-13d9d6218597/resource/eac5ca3d-4285-48f4-bfe3-d3116a262e5f/download/parkdatensta.xml')
geo = BeautifulSoup(r.text, "html.parser")
# last_updated is the date when the data on the page was last updated, it should be listed on most pages
last_updated = soup.find_all("publicationtime")[0].text.split(".")[0]
data = {
# convert_date is a utility function you can use to turn this date into the correct string format
"last_updated": last_updated,
# URL for the page where the scraper can gather the data
"lots": []
}
for tr in soup.select("parkingfacilitytablestatuspublication > parkingfacilitystatus"):
node = tr.find("parkingfacilityreference")
lot_id = tr.find("parkingfacilityreference")["id"]
lot_total = int(tr.find("totalparkingcapacityshorttermoverride").text)
lot_free = max(lot_total - int(tr.find("totalnumberofoccupiedparkingspaces").text), 0)
# please be careful about the state only being allowed to contain either open, closed or nodata
# should the page list other states, please map these into the three listed possibilities
state = tr.find("parkingfacilitystatus")
if state and state.text in ["open", "closed"]:
state = state.text
else:
state = "nodata"
lotNode = geo.find(id=lot_id)
if not lotNode:
continue
coordsNode = lotNode.find("pointcoordinates")
lot = {
"name": lotNode.find("parkingfacilitydescription").text,
"address": "none",
"coords": {"lng": float(coordsNode.find("longitude").text), "lat": float(coordsNode.find("latitude").text)},
"type": "none",
"id": lot_id
}
data["lots"].append({
"name": lot["name"],
"free": lot_free,
"total": lot_total,
"address": lot["address"],
"coords": lot["coords"],
"state": state,
"lot_type": lot["type"],
"id": lot["id"],
"forecast": False,
})
return data