import html5lib, sys
from html5lib import treebuilders, treewalkers, serializer
from html5lib.filters import sanitizer
p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
f = open( "weather.html", "r" )
dom_tree = p.parse(f)
f.close()
walker = treewalkers.getTreeWalker("dom")
stream = walker(dom_tree)
passtags = [ u'a', u'h1', u'h2', u'h3', u'h4',u'em', u'strong',
u'img', u'dl', u'dt', u'dd' ]
for token in stream:
if token.has_key('name'):
if token['name'] in passtags:
continue
print token