#!/bin/env python ## Network Programming, Example program similar to project 3 ## HTML Parser - get the quick and detailed weather forecast from ## forecast.weather.gov import html5lib, sys, re from html5lib import treebuilders, treewalkers import urllib, urllib2 def wordWrap( s, n ): ## Wrap lines that are too long by inserted line feeds between words. L = len( s ) if L <= n: return s words = s.split( " " ) ret = "" ch = 0 for i in words: if (ch + len(i) < n ): ret += " " + i ch += len( i ) else: ret += "\n" + i ch = len( i ) return ret class mytable(): def __init__(self): self.nrows = 0 self.cols = 0 self.col = 0 self.rows = [] self.txt = '' # print "TABLE" def trTag( self ): # new row self.row = self.nrows self.nrows = self.nrows + 1 self.rows.append( [] ) def trEnd( self ): self.col = 0 def tdTag( self ): # new col if self.col == self.cols: self.cols = self.cols + 1 self.rows[self.row].append( [] ) def tdEnd(self ): self.col = self.col + 1 def addData( self, data ): self.txt += data self.rows[self.row][self.col].append( data ) def printTable( self ): ## Any forecast has the word 'day' in it, the detailed forecast starts ## with 'Hazordous', the quick one does not. m = re.search('day', self.txt ) if m is not None: m = re.search('Hazardous', self.txt ) if m is None: for x in self.rows: for y in x: if len(y): if y[1] == 'Night': print "%s %s:" % (y[0], y[1]) forecast = ' '.join(y[2:]) else: print "%s:" % y[0] forecast = ' '.join(y[1:]) forecast = re.sub( ' \xb0 ', u'\xb0', forecast ) #sys.stdout.write( "%s\n" % forecast ) print forecast sys.stdout.write( '\n' ) sys.stdout.write( '\n' ) else: # print self.rows forecast = self.rows[0][0][2:] print "Detailed Forecast:" # Normally, forcast has an even length with odd # elements the time and the even being the forecast. # For special advisories, the length is odd. # We print the forcast in pairs. lenfor = len(forecast) if lenfor % 2: # True if odd - special advisory print forecast[0] start = 1 stop = lenfor - 1 else: start = 0 stop = lenfor for i in range(start, stop, 2): print forecast[i] print "%s\n" % wordWrap( forecast[i + 1], 60 ) # print "END TABLE" zipcode = '67401' url = 'http://forecast.weather.gov/zipcity.php' data = urllib.urlencode([('inputstring', zipcode)]) req = urllib2.Request(url) p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom")) dom_tree = p.parse(urllib2.urlopen(req, data).read()) walker = treewalkers.getTreeWalker("dom") stream = walker(dom_tree) maintags = [ u'html', u'head', u'body', u'title' ] tabletags = [ u'table', u'tr', u'td' ] listtags = [ u'ul', u'li', u'ol' ] passtags = [ u'a', u'h1', u'h2', u'h3', u'h4',u'em', u'strong', u'br', \ u'img', u'dl', u'dt', u'dd' ] doingTable = False tables = [] # A stack of tables for nested tables for token in stream: if token.has_key('name'): if token['name'] in passtags: continue else: tName = token[ 'name' ] tType = token[ 'type' ] if tType == 'StartTag': if tName in tabletags: if tName == u'table': tbl = mytable() tables.append( tbl ) doingTable = True else: if tName == u'tr': tbl.trTag() else: tbl.tdTag() continue if tType == 'EndTag': if tName in tabletags: if tName == u'table': tbl.printTable() if len( tables ): tbl = tables.pop() else: doingTable = False else: if tName == u'tr': tbl.trEnd() else: tbl.tdEnd() continue if tType == 'Characters': if doingTable: tbl.addData( token['data'] ) #raw_input("Press Enter to Exit")