import html5lib, sys #from html5lib import treebuilders, treewalkers, serializer #from html5lib.filters import sanitizer from xml.dom import Node def scanNode(node, level = 0): msg = node.__class__.__name__ if node.nodeType == Node.ELEMENT_NODE: msg += ", tag: " + node.tagName print " " * level * 4, msg if node.hasChildNodes: for child in node.childNodes: scanNode(child, level + 1) p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom")) f = open( "index.html", "r" ) dom_tree = p.parse(f) f.close() scanNode( dom_tree )