import html5lib, sys
#from html5lib import treebuilders, treewalkers, serializer
#from html5lib.filters import sanitizer
from xml.dom import Node
def scanNode(node, level = 0):
msg = node.__class__.__name__
if node.nodeType == Node.ELEMENT_NODE:
msg += ", tag: " + node.tagName
print " " * level * 4, msg
if node.hasChildNodes:
for child in node.childNodes:
scanNode(child, level + 1)
p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"))
f = open( "index.html", "r" )
dom_tree = p.parse(f)
f.close()
scanNode( dom_tree )