Skip to content

Commit

Permalink
Building web scraping functions
Browse files Browse the repository at this point in the history
  • Loading branch information
BenOSanders committed Mar 23, 2019
1 parent a6cb9fa commit 8d1a414
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"python.pythonPath": "venv/bin/python3"
}
72 changes: 72 additions & 0 deletions SOQuery/SOQuery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from bs4 import BeautifulSoup
import requests
#from stackapi import StackAPI


class WebScraper:


# Returns the parsed html of a page.
def scrape_so(searchStrings):

url = "https://stackoverflow.com/search?q="

# Build url to search.
url = url + searchStrings[0]
for i in searchStrings[1:]:
url = url + "+" + i
#print(url)

# Request page.
r = requests.get(url)

# Get html from page.
html_doc = r.text

# Convert html to BeautfulSoup lxml.
soup = BeautifulSoup(html_doc, 'lxml')

# Returns lxml.
return soup

def scrape_question(url):
url = "https://stackoverflow.com/" + url

r = requests.get(url)

html_doc = r.text

soup = BeautifulSoup(html_doc, 'lxml')

return soup


def get_post_url(soup):
#postUrl = soup("div")

theDiv = soup.find("div", {"data-position": "1"})
#print(theDiv)
theLink = theDiv.find("a", {"class": "question-hyperlink"})
theRef = theLink.get("href")
#print(theLink.get("href"))

return theRef

def get_answer(soup):
theDiv = soup.find("div", {"itemprop": "acceptedAnswer"})
theTextDiv = theDiv.get("div", {"class": "post-text"})


print(theDiv.find_all("p"))
print(theDiv.find_all("blockquote"))



if __name__ == "__main__":
stringToSearch = ["bad", "request", "error", "flask"]
searchSoup = WebScraper.scrape_so(stringToSearch)
postUrl = WebScraper.get_post_url(searchSoup)

answerSoup = WebScraper.scrape_question(postUrl)
WebScraper.get_answer(answerSoup)
#print(returnThing)

0 comments on commit 8d1a414

Please sign in to comment.