Skip to content
This repository has been archived by the owner on Apr 16, 2022. It is now read-only.

Commit

Permalink
PEP-8 conformity and making DiscountsParser a sequence
Browse files Browse the repository at this point in the history
  • Loading branch information
rg3 committed Oct 13, 2011
1 parent 28dd519 commit 0d18fe1
Showing 1 changed file with 130 additions and 115 deletions.
245 changes: 130 additions & 115 deletions steam_discounts
Original file line number Diff line number Diff line change
Expand Up @@ -12,120 +12,135 @@ import sys
import urllib

class Entry(object):
def __init__(self):
self.title = u''
self.orig_price = u''
self.discount = u''
self.price = u''
def __init__(self):
self.title = u''
self.orig_price = u''
self.discount = u''
self.price = u''
class DiscountsParser(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)

self.current_entry_ = None
self.entries_ = []

self.in_h4_ = False
self.in_discount_pct_ = False
self.in_tab_price_ = False
self.in_strike_ = False

@staticmethod
def entity2uni(name):
return unichr(htmlentitydefs.name2codepoint[name])

@staticmethod
def ref2uni(ref):
return unichr(int(ref))

def handle_starttag(self, tag, attrs):
attrs_map = dict(attrs)

if tag == 'h4':
# First field to extract, hence new entry.
self.in_h4_ = True
self.current_entry_ = Entry()

elif tag == 'div':
if attrs_map.get('class', '') == 'tab_discount discount_pct':
self.in_discount_pct_ = True
elif attrs_map.get('class', '') == 'tab_price':
self.in_tab_price_ = True

elif tag == 'strike':
self.in_strike_ = True

def handle_endtag(self, tag):
if tag == 'h4':
self.in_h4_ = False

elif tag == 'div':
if self.in_discount_pct_:
self.in_discount_pct_ = False
elif self.in_tab_price_:
self.in_tab_price_ = False
# This was the last field to extract.
self.entries_.append(self.current_entry_)

elif tag == 'strike':
self.in_strike_ = False

def append_text(self, text):
if self.in_h4_:
self.current_entry_.title += text
elif self.in_discount_pct_:
self.current_entry_.discount += text
elif self.in_strike_:
self.current_entry_.orig_price += text
elif self.in_tab_price_:
# Note we only enter here if not in <strike>
self.current_entry_.price += text

def handle_data(self, data):
self.append_text(data.strip().decode('utf-8'))

def handle_entityref(self, name):
self.append_text(self.entity2uni(name))

def handle_charref(self, ref):
self.append_text(self.ref2uni(ref))

def get_entries(self):
return self.entries_[:]

def __init__(self):
HTMLParser.HTMLParser.__init__(self)

@staticmethod
def entity2uni(name):
return unichr(htmlentitydefs.name2codepoint[name])

@staticmethod
def ref2uni(ref):
return unichr(int(ref))

def reset(self):
HTMLParser.HTMLParser.reset(self)

self.current_entry_ = None
self.entries_ = []

self.in_h4_ = False
self.in_discount_pct_ = False
self.in_tab_price_ = False
self.in_strike_ = False

def handle_starttag(self, tag, attrs):
attrs_map = dict(attrs)

if tag == 'h4':
# First field to extract, hence new entry.
self.in_h4_ = True
self.current_entry_ = Entry()

elif tag == 'div':
if attrs_map.get('class', '') == 'tab_discount discount_pct':
self.in_discount_pct_ = True
elif attrs_map.get('class', '') == 'tab_price':
self.in_tab_price_ = True

elif tag == 'strike':
self.in_strike_ = True

def handle_endtag(self, tag):
if tag == 'h4':
self.in_h4_ = False

elif tag == 'div':
if self.in_discount_pct_:
self.in_discount_pct_ = False
elif self.in_tab_price_:
self.in_tab_price_ = False
# This was the last field to extract.
self.entries_.append(self.current_entry_)

elif tag == 'strike':
self.in_strike_ = False

def append_text(self, text):
if self.in_h4_:
self.current_entry_.title += text
elif self.in_discount_pct_:
self.current_entry_.discount += text
elif self.in_strike_:
self.current_entry_.orig_price += text
elif self.in_tab_price_:
# Note we only enter here if not in <strike>.
self.current_entry_.price += text

def handle_data(self, data):
self.append_text(data.strip().decode('utf-8'))

def handle_entityref(self, name):
self.append_text(self.entity2uni(name))

def handle_charref(self, ref):
self.append_text(self.ref2uni(ref))

# Behave like a sequence of Entries.
def __len__(self):
return self.entries_.__len__()

def __getitem__(self, key):
return self.entries_.__getitem__(key)

def __iter__(self):
return self.entries_.__iter__()

def __reversed__(self):
return self.entries_.__reversed__()

def __contains__(self, item):
return self.entries_.__contains__(item)

# Entry point.
if __name__ == '__main__':
# Find the number of discounts first.
stream = urllib.urlopen('http://store.steampowered.com/')
page = stream.read()
stream.close()

mo = re.search(r"javascript:PageTab\('Discounts', *\d+, *(\d+)", page)
if mo is None:
sys.exit('FATAL: unable to find the number of discounts')
total_discounts = int(mo.group(1))

# Retrieve the discounts.
stream = urllib.urlopen(
'http://store.steampowered.com/search/tab?bHoverEnabled=true' +
'&style=&navcontext=1_4_4_&tab=Discounts&start=0' +
'&count=%d' % total_discounts)
page = stream.read()
stream.close()

discounts_parser = DiscountsParser()
discounts_parser.feed(page)

# Print them on screen
title_width = 40
entries = discounts_parser.get_entries()
child = subprocess.Popen(['less'], stdin=subprocess.PIPE)
for e in entries:
print >>child.stdin, (u'%s ...%s %7s [%7s %s]' % (
e.title[:title_width],
'.' * (title_width - len(e.title)),
e.price,
e.orig_price,
e.discount,
)).encode('utf-8')
child.stdin.close()
child.wait()
# Find the number of discounts first.
conn = urllib.urlopen('http://store.steampowered.com/')
page = conn.read()
conn.close()

mo = re.search(r"javascript:PageTab\('Discounts', *\d+, *(\d+)", page)
if mo is None:
sys.exit('FATAL: unable to find the number of discounts')
total_discounts = int(mo.group(1))

# Retrieve the discounts.
conn = urllib.urlopen(
'http://store.steampowered.com/search/tab?bHoverEnabled=true&style=' +
'&navcontext=1_4_4_&tab=Discounts&start=0&count=%d' % total_discounts)
page = conn.read()
conn.close()

discounts_parser = DiscountsParser()
discounts_parser.feed(page)

# Print them on screen.
title_width = 40
child = subprocess.Popen(['less'], stdin=subprocess.PIPE)
for entry in discounts_parser:
print >>child.stdin, (u'%s ...%s %7s [%7s %s]' % (
entry.title[:title_width],
'.' * (title_width - len(entry.title)),
entry.price,
entry.orig_price,
entry.discount,
)).encode('utf-8')
child.stdin.close()
child.wait()

0 comments on commit 0d18fe1

Please sign in to comment.