From 40a6f80d2be8b8785a15eb55cfc3d8194b19fbf3 Mon Sep 17 00:00:00 2001 From: Elijah Sawyers Date: Mon, 21 Oct 2019 15:21:38 -0500 Subject: [PATCH] print post text and print code block cleanup --- autostack/so_web_scraper/__init__.py | 80 ++++++++++++++++------------ 1 file changed, 47 insertions(+), 33 deletions(-) diff --git a/autostack/so_web_scraper/__init__.py b/autostack/so_web_scraper/__init__.py index 05d72a2..8416b19 100644 --- a/autostack/so_web_scraper/__init__.py +++ b/autostack/so_web_scraper/__init__.py @@ -218,30 +218,35 @@ def print_post_text(post_text): 'post-text.' ''' + element_colors = { + 'h1': 'white', + 'h2': 'white', + 'h3': 'white', + 'p': 'white', + 'blockquote': 'yellow', + } + for element in post_text: - if ( - element.name == 'h1' or - element.name == 'h2' or - element.name == 'h3' - ): # Headers. - print( - colored(element.text, 'white', attrs=['bold']) - ) - elif element.name == 'p': # Text. - print( - colored(element.text, 'white') - ) - elif element.name == 'blockquote': # Quotes. + try: print( - colored(' ' + element.text, 'yellow') + colored(element.text, element_colors[element.name], attrs=['bold']) ) - elif element.name == 'ul': # Lists. - for item in element.find_all('li'): - print( - colored(' - ' + item.text, 'green', attrs=['bold']) - ) - elif element.name == 'pre': # Code. - print_code_block(element.find('code')) + except KeyError: + if element.name == 'ul': # Lists. + print_ul(element) + elif element.name == 'pre': # Code. + print_code_block(element.find('code')) + + +def print_ul(ul): + ''' + TODO: Write docstring. + ''' + + for item in ul.find_all('li'): + print( + colored(' - ' + item.text, 'green', attrs=['bold']) + ) def print_code_block(code_block): @@ -261,7 +266,7 @@ def print_code_block(code_block): 'code' element from a Stack Overflow post. ''' - color_dict = { + token_colors = { 'Token.Keyword': 'blue', 'Token.Name.Builtin.Pseudo': 'blue', 'Token.Literal.Number.Integer': 'green', @@ -276,22 +281,13 @@ def print_code_block(code_block): print('') # Store the code's text. - code = '' - - # Loop through code spans. - for token in code_block: - # Catch when spans are wrapped with other tags. - try: - code += token - except TypeError: - for nestedToken in token.contents: - code += nestedToken + code = get_code_text(code_block) # Loop over code, and highlight. for token, content in pygments.lex(code, PythonLexer()): try: print( - colored(content, color_dict[str(token)]), + colored(content, token_colors[str(token)]), end='' ) except KeyError: @@ -301,3 +297,21 @@ def print_code_block(code_block): ) print('') + +def get_code_text(code_block): + ''' + TODO: Write docstring. + ''' + + code = '' + + # Loop through code spans. + for token in code_block: + # Catch when spans are wrapped with other tags. + try: + code += token + except TypeError: + for nestedToken in token.contents: + code += nestedToken + + return code