网络编程
位置:首页>> 网络编程>> Python编程>> python解析xml文件操作实例

python解析xml文件操作实例

作者:shichen2014  发布时间:2022-01-02 10:39:13 

标签:python,xml

本文实例讲述了python解析xml文件操作的实现方法。分享给大家供大家参考。具体方法如下:

xml文件内容如下:


<?xml version="1.0" ?>
<!--Simple xml document__chapter 8-->
<book>
 <title>
   sample xml thing
 </title>
 <author>
   <name>
     <first>
       ma
     </first>
     <last>
       xiaoju
     </last>
   </name>
   <affiliation>
     Springs Widgets, Inc.
   </affiliation>
 </author>
 <chapter number="1">
   <title>
     First
   </title>
   <para>
     I think widgets are greate.You should buy lots of them forom
     <company>
       Spirngy Widgts, Inc
     </company>
   </para>
 </chapter>
</book>

python代码:


from xml.dom import minidom, Node
import re, textwrap

class SampleScanner:
 """"""

def __init__(self, doc):
   """Constructor"""
   assert(isinstance(doc, minidom.Document))
   for child in doc.childNodes:
     if child.nodeType == Node.ELEMENT_NODE and \
       child.tagName == "book":
       self.handle_book(child)

def handle_book(self, node):

for child in node.childNodes:
     if child.nodeType != Node.ELEMENT_NODE:
       continue
     if child.tagName == "title":
       print "Book titile is:", self.gettext(child.childNodes)
     if child.tagName == "author":
       self.handle_author(child)
     if child.tagName == "chapter":
       self.handle_chapter(child)

def handle_chapter(self, node):
   number = node.getAttribute("number")
   print "number:", number
   title_node = node.getElementsByTagName("title")
   print "title:", self.gettext(title_node)

for child in node.childNodes:
     if child.nodeType != Node.ELEMENT_NODE:
       continue
     if child.tagName == "para":
       self.handle_chapter_para(child)

def handle_chapter_para(self, node):
   company = ""
   company = self.gettext(node.getElementsByTagName("company"))
   print "chapter:para:company", company

def handle_author(self, node):
   for child in node.childNodes:
     if child.nodeType != Node.ELEMENT_NODE:
       continue
     if child.tagName == "name":
       self.handle_author_name(child)
     if child.tagName == "affiliation":
       print "affiliation:", self.gettext(child.childNodes)

def handle_author_name(self, node):
   first = ""
   last = ""
   for child in node.childNodes:
     if child.nodeType != Node.ELEMENT_NODE:
       continue
     if child.tagName == "first":
       first = self.gettext(child.childNodes)
     if child.tagName == 'last':
       last = self.gettext(child.childNodes)

print "firstname:%s,lastname:%s" % (first, last)

def gettext(self, nodelist):
   retlist = []
   for node in nodelist:
     if node.nodeType == Node.TEXT_NODE:
       retlist.append(node.wholeText)
     elif node.hasChildNodes:
       retlist.append(self.gettext(node.childNodes))

return re.sub('\s+', " ", ''.join(retlist))

if __name__=="__main__":
 doc = minidom.parse("simple.xml")
 sample = SampleScanner(doc)

希望本文所述对大家的Python程序设计有所帮助。

0
投稿

猜你喜欢

手机版 网络编程 asp之家 www.aspxhome.com