#!/usr/bin/env python """ **File:** wxWeather.py **Author:** Tim Bower, Copyright 2009, Open Source Apache License -- See non-docstring source code comments for license details. **HTML parser** - extract the current conditions and 5-day forecast from `www.wunderground.com `_ and display it in a simple wxPython graphical window. A component of Project 3 for Network Programming Class. """ # Copyright 2009 Tim Bower # This program was developed for education purposes for the Network # Programming Class, CMST 355, at Kansas State University at Salina. # # This program is licensed as Open Source Software using the Apache License, # Version 2.0 (the "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # You are free to use, copy, distribute, and display the work, and to make # derivative works. If you do, you must give the original author credit. The # author specifically permits (and encourages) teachers to post, reproduce, # and distribute some or all of this material for use in their classes or by # their students. import sys import re import time import urllib import urllib2 import wx import wx.html from html5lib import HTMLParser, treebuilders, treewalkers class WeatherPanel(wx.Panel): """ The main GUI class for *wxWeather* -- no real reason why this code is at the panel level instead of the frame level with the panel being just one of the objects in the frame. I started with *wxHTML*, which does it this way. class *WeatherPanel* inherits :class:`wx.Panel` and adds a :class:`wx.html.HtmlWindow` to diplay HTML content. The HTML content comes from :func:`getWeather`, which gets data from from `wuunderground `_ and reformats it. """ def __init__(self, parent, id): # default pos is (0, 0) and size is (-1, -1) which fills the frame wx.Panel.__init__(self, parent, id) self.SetBackgroundColour("maroon") self.html1 = wx.html.HtmlWindow(self, id, pos=(0,30), size=(610,370)) self.btn1 = wx.Button(self, -1, "Reload Forecast", pos=(0,0)) self.btn1.Bind(wx.EVT_BUTTON, self.Onreload) self.btn2 = wx.Button(self, -1, "Change Forecast Location", pos=(200,0)) self.btn2.Bind(wx.EVT_BUTTON, self.OnNewZip) def Onreload(self, event): "Call back from *Reload Forecast* button" self.load_weather() def OnNewZip(self, event): "Call back from *Change Forecast Location* button" dlg = wx.TextEntryDialog(self, "Enter a Zip Code", "New Forecast Location", style=wx.OK | wx.CANCEL ) if dlg.ShowModal() == wx.ID_OK: self.load_weather(dlg.GetValue()) dlg.Destroy() def load_weather(self, zipcode=None): "Validate zip code and call :func:`getWeather`" if zipcode: if len(zipcode) == 5 and zipcode.isdigit(): self.zipcode = zipcode else: dlg = wx.MessageDialog(self, "The Zip Code must consist of 5 digits", "Invalid Zip Code", style = wx. OK) dlg.ShowModal() dlg.Destroy() return wx.BeginBusyCursor() self.html1.SetPage(getWeather(self.zipcode)) wx.EndBusyCursor() def OnExit(self, event): """ Call back from clicking the close window (X) icon on the frame. Close the application by Destroying the object """ self.Destroy() def wordWrap(s, n): ## Wrap lines that are too long by inserted line feeds between words. ## This code could probably be done away with, if the textwrap module were ## used. I didn't know about textwrap when I wrote this. L = len(s) if L <= n: return s words = s.split(" ") ret = "" ch = 0 for i in words: if (ch + len(i) < n): ret += " " + i ch += len(i) else: ret += "\n" + i ch = len(i) return ret def testGet(x, i): """A List function: return x[i] if it exists, else empty string""" if i < len(x): return x[i] return '' class mytable: """ As the HTML is parsed, any tables get stored here. This just helps to orgaize and later retrieve the data. """ def __init__(self): self.nrows = 0 self.row = 0 self.cols = 0 self.col = -1 self.tbl = [] self.txt = '' self.in_tr = False self.in_td = False def trTag(self): "A tr start-tag (new row)" self.row = self.nrows self.nrows += 1 self.tbl.append([]) self.in_tr = True def trEnd(self): "A tr end-tag (end of row)" self.col = -1 self.in_tr = False def tdTag(self): # new col "A td start-tag (new column)" self.col = self.col + 1 if self.col == self.cols: self.cols = self.cols + 1 self.tbl[self.row].append([]) self.in_td = True def tdEnd(self): "A td end-tag (end of column)" self.in_td = False def addData(self, data): """ :type data: string New data to add to the current table cell. Items are stored in a list. Thus, a 2 dimensional HTML table is stored as a Python 3-dimensional list. """ self.txt += data if self.in_td and self.in_tr: self.tbl[self.row][self.col].append(data) def txtMatch(self, st): """Boolean: Determine if table data holds a desired string""" m = re.search(st, self.txt) if m is None: return False else: return True def txtLen(self): """ Get the length of the table -- useful to guess which table we are working with. """ return len(self.txt) def getTableData(self): """ :rtype: a 3-dimensional list of strings [[[] .. ] .. ] Returns the text data held in the table """ return self.tbl def printTable(self): """ A generic table printer, depending on the data, it may or or may not get the job done, but could help with debugging. In most cases, you will want to use :meth:`getTableData` and format the data per your needs. """ for row in self.tbl: for col in row: for line in col: print "%s\n" % wordWrap(line, 60) def xpose(self): """ Transpose a table -- rows and columns switch. This is probably the trickiest line of code used in this class. Without using the nested list comprehension, it would be much longer. """ self.tbl = \ [[testGet(x,i) for x in self.tbl[:]] for i in range(self.cols)] def getHTML(self): """ Return the table data formated as HTML. Returns a list of strings. This is debugging code - not used in the final program. """ global deg html = [u''] for row in self.tbl: html.append(u'') for col in row: line = deg.sub(u'°', wordWrap(' '.join(col), 60)) if len(line) > 0: html.append(u'') html.append(u'') html.append(u'
' + line + u'
') return html ## End of mytable Class def getWUTable(tbl): """ If the current table is what we want, return it as HTML formated as we want it. This function will the first to be modified when wuWeather ever changes the format of the HTML. """ if tbl.txtMatch("State Extremes"): return None if tbl.txtMatch("Quarter"): return None if tbl.txtMatch("Blog"): return None if tbl.txtMatch("Calendar"): return None if tbl.txtMatch("Statement"): return None ## Any forecast has the word 'day' in it if tbl.txtMatch("day"): tblLen = tbl.txtLen() # determining which table it is by length may be more reliable than # by content -- they keep changing the web page content if tblLen < 500: tbl.xpose() return get5day(tbl.getTableData()) return None def get5day(data): # quick 5 day forecast global fiveDayShown, deg if fiveDayShown: return # already did it -- stop random printing fiveDayShown = True html = [u'

' + ' '.join(data[0][-1]) + u'

', u'' ] for day in data: html.append(u'' % ''.join(day[0])) html.append(u'' % deg.sub(u'°', ' '.join(day[1]))) html.append(u'' % ' '.join(day[2])) html.append(u'
%s%s%s
') return html class Current_conditions: """ Class to collect and report on the current weather conditions, which are spread out over multiple tables. """ def __init__(self): self.currents = {} self.needed = (u'lu', u'tempf', u'humidity', u'windspeedmph', u'winddir') self.data_is_pending = False self.need_values = True def set_currents(self, data): """ :param data: a list of (variable, value) tuples. Collect current weather condition information from token data. """ cur = {} for item in data: cur[item[0]] = item[1] if cur.has_key(u'pwsvariable'): variable = cur[u'pwsvariable'] if variable in self.needed: try: value = cur[u'value'] self.currents[variable] = value except KeyError: # the data we want must be in next Characters token self.pending = variable self.data_is_pending = True def set_pending(self, data): """ This is exactly why parsing HTML is ugly, messy work. In some cases, the start tag processed in :meth:`set_currents` tips us off to look for a current condition value in the next character token that we come across. The data in this token should be what we are looking for. """ if self.data_is_pending: self.currents[self.pending] = data self.data_is_pending = False @property def needs_values(self): """ A boolean to test to see if we are still looking for current condition data. """ return not len(self.currents) == len(self.needed) def get_html(self): """ Return current conditions data with HTML formatting. """ html = [u''] # do this instead of using keys() to preserve order avail = filter(self.currents.has_key, self.needed) if u'lu' in avail: html.append(u'' % \ time.strftime("%b %d, %Y, %I:%M %p", time.localtime(float(self.currents[u'lu'])))) if u'tempf' in avail: html.append(u'' %\ self.currents[u'tempf']) if u'humidity' in avail: html.append(u'' %\ self.currents[u'humidity']) if u'windspeedmph' in avail and u'winddir' in avail: html.append(u'' % \ (self.currents[u'windspeedmph'], self.currents[u'winddir'])) elif u'windspeedmph' in avail: html.append(u'' %\ self.currents[u'windspeedmph']) html.append(u'
Updated: %s
Temperature: %s°F
Humidity: %s%%
Wind: %s mph from the %s
Wind: %s mph
') return html def getWeather(zipcode='67401'): """ :param zipcode: 5 digit zip code string (optional) Grab a page from `wunderground `_ and parse it, strip out a quick summary and return the summary with simple HTML formating. """ global fiveDayShown, deg fiveDayShown = False url = "http://www.wunderground.com/cgi-bin/findweather/getForecast" data = urllib.urlencode([('query', zipcode)]) req = urllib2.Request(url) p = HTMLParser(tree=treebuilders.getTreeBuilder("dom")) dom_tree = p.parse(urllib2.urlopen(req, data).read()) walker = treewalkers.getTreeWalker("dom") stream = walker(dom_tree) maintags = [ u'html', u'head', u'body', u'title' ] tabletags = [ u'table', u'tr', u'td' ] listtags = [ u'ul', u'li', u'ol' ] passtags = [ u'a', u'h1', u'h2', u'h3', u'h4',u'em', u'strong', u'br', \ u'img', u'dl', u'dt', u'dd' ] currents = Current_conditions() got_currents = False doingTable = False doingTitle = False tables = [] # A stack of tables for nested tables html_data = [] for token in stream: if token.has_key('name'): if token['name'] in passtags: continue else: tName = token['name'] tType = token['type'] if tType == 'StartTag': if tName in tabletags: if tName == u'table': tbl = mytable() tables.append(tbl) doingTable = True else: if tName == u'tr': tbl.trTag() else: tbl.tdTag() elif tName in maintags: if tName == u'title': doingTitle = True no_title = True html_data.append(u'<' + tName + u'>') elif currents.needs_values: currents.set_currents(token['data']) continue if tType == 'EndTag': if tName in tabletags: if tName == u'table': html_table = getWUTable(tbl) if html_table: # add current conditions first html_data.extend(currents.get_html()) html_data.extend(html_table) if len(tables): tbl = tables.pop() else: doingTable = False else: if tName == u'tr': tbl.trEnd() else: tbl.tdEnd() elif tName in maintags: html_data.append(u'') if tName == u'title': doingTitle = False html_data.append(u'

%s

' % title) continue if tType == 'Characters': if currents.data_is_pending: currents.set_pending(token['data']) if doingTable: tbl.addData(token['data']) elif doingTitle and no_title: title = token['data'] html_data.append(title) no_title = False #nx = re.compile(u'\xa0') #for line in html_data: # try: # print nx.sub('',line) # except: # pass return '\n'.join(html_data) if __name__ == '__main__': deg = re.compile(' \xb0 ') app = wx.PySimpleApp() # create a window/frame, no parent, -1 is default ID, title, size frame = wx.Frame(None, -1, "Weather Forecast", size=(610, 400)) # call the derived class, -1 is default ID win = WeatherPanel(frame,-1) # show the frame frame.Show(True) # start the event loop # load initial weather data win.load_weather('67401') app.MainLoop()