• python的xml.dom学习笔记


    首先说一下,由于这篇文章主要是自己随性学习写的,所以读者看起来可能很乱,呵呵。可以给大家稍微推荐一篇:http://www.cnblogs.com/xuxm2007/archive/2011/01/16/1936610.html 稍微清晰一点

    #coding=utf-8
    
    #解析xml文件中的所有的link标签
    from xml.dom import minidom
    from xml.dom.minidom import getDOMImplementation
    doc=minidom.parse("d:\\hello.html")
    
    nodes=doc.getElementsByTagName("link")
    
    for node in nodes:
        print "<",node.tagName,
        print "type=\"",node.getAttribute("type"),"\"",
        print "rel=\"",node.getAttribute("rel"),"\"",
        print "href=\"",node.getAttribute("href"),"\"",
        print "/>"
        
    print "通过另外一种方式获得link标签"
    linknodes=doc.getElementsByTagName("link")
    for i in range(len(linknodes)):
        print linknodes[i].getAttribute("type"),
        print linknodes[i].getAttribute("rel"),
        print linknodes[i].getAttribute("href")
        
    
    #操作节点
    node=linknodes[0]
    print dir(node)
    print node.parentNode
    print node.prefix
    print node.nodeType,node.nodeValue,node.nodeName
    print node.localName
    print node.childNodes
    print node.firstChild,node.lastChild
    print node.attributes
    print node.namespaceURI
    print node.nextSibling
    print "--"*10
    print node.tagName
    
    print "==="*20
    impl=getDOMImplementation()
    newdoc=impl.createDocument(None , "some_tag", None)
    top_element=newdoc.documentElement
    node1=newdoc.createTextNode("node1")
    node2=newdoc.createTextNode("node2")
    node3=newdoc.createTextNode("node3")
    
    top_element.appendChild(node1)
    top_element.appendChild(node2)
    top_element.appendChild(node3)
    
    top_element.removeChild(node3)
    top_element.insertBefore(node3,node2)
    
    print top_element.childNodes
    

      运行结果:

    < link type=" text/css " rel=" stylesheet " href=" http://www.cnblogs.com/css/common.css " />
    < link type=" text/css " rel=" stylesheet " href=" http://www.cnblogs.com/Skins/kubrick/style.css " />
    < link type=" text/css " rel=" stylesheet " href=" http://www.cnblogs.com/css/common2.css " />
    < link type=" text/css " rel=" stylesheet " href=" http://common.cnblogs.com/css/shCore.css " />
    < link type=" text/css " rel=" stylesheet " href=" http://common.cnblogs.com/css/shThemeDefault.css " />
    < link type=" application/rss+xml " rel=" alternate " href=" http://www.cnblogs.com/rollenholt/rss " />
    < link type=" application/rsd+xml " rel=" EditURI " href=" http://www.cnblogs.com/rollenholt/rsd.xml " />
    < link type=" application/wlwmanifest+xml " rel=" wlwmanifest " href=" http://www.cnblogs.com/rollenholt/wlwmanifest.xml " />
    通过另外一种方式获得link标签
    text/css stylesheet http://www.cnblogs.com/css/common.css
    text/css stylesheet http://www.cnblogs.com/Skins/kubrick/style.css
    text/css stylesheet http://www.cnblogs.com/css/common2.css
    text/css stylesheet http://common.cnblogs.com/css/shCore.css
    text/css stylesheet http://common.cnblogs.com/css/shThemeDefault.css
    application/rss+xml alternate http://www.cnblogs.com/rollenholt/rss
    application/rsd+xml EditURI http://www.cnblogs.com/rollenholt/rsd.xml
    application/wlwmanifest+xml wlwmanifest http://www.cnblogs.com/rollenholt/wlwmanifest.xml
    ['ATTRIBUTE_NODE', 'CDATA_SECTION_NODE', 'COMMENT_NODE', 'DOCUMENT_FRAGMENT_NODE', 'DOCUMENT_NODE', 'DOCUMENT_TYPE_NODE', 'ELEMENT_NODE', 'ENTITY_NODE', 'ENTITY_REFERENCE_NODE', 'NOTATION_NODE', 'PROCESSING_INSTRUCTION_NODE', 'TEXT_NODE', '__doc__', '__init__', '__module__', '__nonzero__', '__repr__', '_attrs', '_attrsNS', '_call_user_data_handler', '_child_node_types', '_get_attributes', '_get_childNodes', '_get_firstChild', '_get_lastChild', '_get_localName', '_get_tagName', '_magic_id_nodes', 'appendChild', 'attributes', 'childNodes', 'cloneNode', 'firstChild', 'getAttribute', 'getAttributeNS', 'getAttributeNode', 'getAttributeNodeNS', 'getElementsByTagName', 'getElementsByTagNameNS', 'getInterface', 'getUserData', 'hasAttribute', 'hasAttributeNS', 'hasAttributes', 'hasChildNodes', 'insertBefore', 'isSameNode', 'isSupported', 'lastChild', 'localName', 'namespaceURI', 'nextSibling', 'nodeName', 'nodeType', 'nodeValue', 'normalize', 'ownerDocument', 'parentNode', 'prefix', 'previousSibling', 'removeAttribute', 'removeAttributeNS', 'removeAttributeNode', 'removeAttributeNodeNS', 'removeChild', 'replaceChild', 'schemaType', 'setAttribute', 'setAttributeNS', 'setAttributeNode', 'setAttributeNodeNS', 'setIdAttribute', 'setIdAttributeNS', 'setIdAttributeNode', 'setUserData', 'tagName', 'toprettyxml', 'toxml', 'unlink', 'writexml']
    <DOM Element: head at 0x1b3e968>
    None
    1 None link
    link
    []
    None None
    <xml.dom.minidom.NamedNodeMap object at 0x01B4D648>
    http://www.w3.org/1999/xhtml
    <DOM Text node "u'\n'">
    --------------------
    link
    ============================================================
    [<DOM Text node "'node1'">, <DOM Text node "'node3'">, <DOM Text node "'node2'">]
    

      

  • 相关阅读:
    yum 在线安装LAMP
    python-scrapy-增量式
    python-scrapy-分布式爬取
    python-scrapy-全站数据爬取-CrawlSpider
    python-scrapy-中间件的学习
    python-scrapy深度爬取
    python-scrapy环境配置
    django DOM
    window10设置环境变量(以设置虚拟环境和SQL为例)
    加入园子啦
  • 原文地址:https://www.cnblogs.com/rollenholt/p/2271131.html
Copyright © 2020-2023  润新知