Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

print post text and print code block cleanup #23

Merged
merged 1 commit into from
Oct 21, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 47 additions & 33 deletions autostack/so_web_scraper/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,30 +218,35 @@ def print_post_text(post_text):
'post-text.'
'''

element_colors = {
'h1': 'white',
'h2': 'white',
'h3': 'white',
'p': 'white',
'blockquote': 'yellow',
}

for element in post_text:
if (
element.name == 'h1' or
element.name == 'h2' or
element.name == 'h3'
): # Headers.
print(
colored(element.text, 'white', attrs=['bold'])
)
elif element.name == 'p': # Text.
print(
colored(element.text, 'white')
)
elif element.name == 'blockquote': # Quotes.
try:
print(
colored(' ' + element.text, 'yellow')
colored(element.text, element_colors[element.name], attrs=['bold'])
)
elif element.name == 'ul': # Lists.
for item in element.find_all('li'):
print(
colored(' - ' + item.text, 'green', attrs=['bold'])
)
elif element.name == 'pre': # Code.
print_code_block(element.find('code'))
except KeyError:
if element.name == 'ul': # Lists.
print_ul(element)
elif element.name == 'pre': # Code.
print_code_block(element.find('code'))


def print_ul(ul):
'''
TODO: Write docstring.
'''

for item in ul.find_all('li'):
print(
colored(' - ' + item.text, 'green', attrs=['bold'])
)


def print_code_block(code_block):
Expand All @@ -261,7 +266,7 @@ def print_code_block(code_block):
'code' element from a Stack Overflow post.
'''

color_dict = {
token_colors = {
'Token.Keyword': 'blue',
'Token.Name.Builtin.Pseudo': 'blue',
'Token.Literal.Number.Integer': 'green',
Expand All @@ -276,22 +281,13 @@ def print_code_block(code_block):
print('')

# Store the code's text.
code = ''

# Loop through code spans.
for token in code_block:
# Catch when spans are wrapped with other tags.
try:
code += token
except TypeError:
for nestedToken in token.contents:
code += nestedToken
code = get_code_text(code_block)

# Loop over code, and highlight.
for token, content in pygments.lex(code, PythonLexer()):
try:
print(
colored(content, color_dict[str(token)]),
colored(content, token_colors[str(token)]),
end=''
)
except KeyError:
Expand All @@ -301,3 +297,21 @@ def print_code_block(code_block):
)

print('')

def get_code_text(code_block):
'''
TODO: Write docstring.
'''

code = ''

# Loop through code spans.
for token in code_block:
# Catch when spans are wrapped with other tags.
try:
code += token
except TypeError:
for nestedToken in token.contents:
code += nestedToken

return code