当前位置 博文首页 > python解析xml文件操作实例

    python解析xml文件操作实例

    作者:admin 时间:2021-07-14 18:40

    本文实例讲述了python解析xml文件操作的实现方法。分享给大家供大家参考。具体方法如下:

    xml文件内容如下:

    <?xml version="1.0" ?> 
    <!--Simple xml document__chapter 8--> 
    <book> 
      <title> 
        sample xml thing 
      </title> 
      <author> 
        <name> 
          <first> 
            ma 
          </first> 
          <last> 
            xiaoju 
          </last> 
        </name> 
        <affiliation> 
          Springs Widgets, Inc. 
        </affiliation> 
      </author> 
      <chapter number="1"> 
        <title> 
          First 
        </title> 
        <para> 
          I think widgets are greate.You should buy lots of them forom 
          <company> 
            Spirngy Widgts, Inc 
          </company> 
        </para> 
      </chapter> 
    </book> 
    
    

    python代码:

    from xml.dom import minidom, Node 
    import re, textwrap 
     
    class SampleScanner: 
      """""" 
     
      def __init__(self, doc): 
        """Constructor""" 
        assert(isinstance(doc, minidom.Document)) 
        for child in doc.childNodes: 
          if child.nodeType == Node.ELEMENT_NODE and \ 
            child.tagName == "book": 
            self.handle_book(child) 
             
      def handle_book(self, node): 
         
        for child in node.childNodes: 
          if child.nodeType != Node.ELEMENT_NODE: 
            continue 
          if child.tagName == "title": 
            print "Book titile is:", self.gettext(child.childNodes) 
          if child.tagName == "author": 
            self.handle_author(child) 
          if child.tagName == "chapter": 
            self.handle_chapter(child) 
             
      def handle_chapter(self, node): 
        number = node.getAttribute("number") 
        print "number:", number 
        title_node = node.getElementsByTagName("title") 
        print "title:", self.gettext(title_node) 
         
        for child in node.childNodes: 
          if child.nodeType != Node.ELEMENT_NODE: 
            continue 
          if child.tagName == "para": 
            self.handle_chapter_para(child) 
             
      def handle_chapter_para(self, node): 
        company = "" 
        company = self.gettext(node.getElementsByTagName("company")) 
        print "chapter:para:company", company 
         
             
      def handle_author(self, node): 
        for child in node.childNodes: 
          if child.nodeType != Node.ELEMENT_NODE: 
            continue 
          if child.tagName == "name": 
            self.handle_author_name(child) 
          if child.tagName == "affiliation": 
            print "affiliation:", self.gettext(child.childNodes) 
             
      def handle_author_name(self, node): 
        first = "" 
        last = "" 
        for child in node.childNodes: 
          if child.nodeType != Node.ELEMENT_NODE: 
            continue 
          if child.tagName == "first": 
            first = self.gettext(child.childNodes) 
          if child.tagName == 'last': 
            last = self.gettext(child.childNodes) 
             
        print "firstname:%s,lastname:%s" % (first, last) 
         
             
      def gettext(self, nodelist): 
        retlist = [] 
        for node in nodelist: 
          if node.nodeType == Node.TEXT_NODE: 
            retlist.append(node.wholeText) 
          elif node.hasChildNodes: 
            retlist.append(self.gettext(node.childNodes)) 
             
        return re.sub('\s+', " ", ''.join(retlist)) 
       
             
    if __name__=="__main__": 
      doc = minidom.parse("simple.xml") 
      sample = SampleScanner(doc) 
    
    

    希望本文所述对大家的Python程序设计有所帮助。

    jsjbwy
    下一篇:没有了