Python 基于xml.etree.ElementTree实现XML对比示例详解
作者:授客 发布时间:2022-02-24 12:25:53
标签:Python,xml.etree.ElementTree,XML
测试环境
Python 3.6
Win10
代码实现
#!/usr/bin/env python 3.4.0
#-*- encoding:utf-8 -*-
__author__ = 'shouke'
import xml.etree.ElementTree as ET
def compare_xml_node_attributes(xml_node1, xml_node2):
result = []
node1_attributes_dict = xml_node1.attrib
node2_attributes_dict = xml_node2.attrib
for attrib1, value in node1_attributes_dict.items():
value2 = node2_attributes_dict.get(attrib1)
if value == value2:
node2_attributes_dict.pop(attrib1)
else:
if value2:
attrib2 = attrib1
node2_attributes_dict.pop(attrib2)
else:
attrib2 = '不存在'
result.append('结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}'.format(attrib1=attrib1 or '不存在',
value1=value or '不存在',
attrib2=attrib2,
value2=value2 or '不存在'))
for attrib2, value2 in node2_attributes_dict.items():
result.append('结点1属性:{attrib1} 值:{value1},结点2属性:{attrib1} 值:{value2}'.format(attrib1='不存在',
value1='不存在',
attrib2=attrib2,
value2=value2))
return result
def compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath):
def get_node_children(xml_node, node_xpath):
result = {}
for child in list(xml_node):
if child.tag not in result:
result[child.tag] = [{'node':child, 'xpath': '%s/%s[%s]' % (node_xpath, child.tag, 1)}]
else:
result[child.tag].append({'node':child, 'xpath': '%s/%s[%s]' % (node_xpath, child.tag, len(result[child.tag])+1)})
return result
result = []
children_of_node1_dict = get_node_children(xml_node1, node1_xpath)
children_of_node2_dict = get_node_children(xml_node2, node2_xpath)
temp_list1 = []
temp_list2 = []
for child_tag, child_node_list in children_of_node1_dict.items():
second_child_node_list = children_of_node2_dict.get(child_tag, [])
if not second_child_node_list:
# 获取xml1中比xml2中多出的子结点
for i in range(0, len(child_node_list)):
temp_list1.append('%s/%s[%s]' % (node1_xpath, child_node_list[i]['node'].tag, i+1))
continue
for first_child, second_child in zip(child_node_list, second_child_node_list):
result.extend(compare_xml_nodes(first_child['node'], second_child['node'], first_child['xpath'], second_child['xpath']))
# 获取xml2中对应结点比xml1中对应结点多出的同名子结点
for i in range(len(child_node_list), len(second_child_node_list)):
temp_list2.append('%s/%s[%s]' % (node2_xpath, second_child_node_list[i]['node'].tag, i+1))
children_of_node2_dict.pop(child_tag)
if temp_list1:
result.append('子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})多了以下子结点:\n{differences}'.format (xpath1=node1_xpath,
xpath2=node2_xpath,
differences='\n'.join(temp_list1)))
# 获取xml2比xml1中多出的子结点
for child_tag, child_node_list in children_of_node2_dict.items():
for i in range(0, len(child_node_list)):
temp_list2.append('%s/%s[%s]' % (node1_xpath, child_node_list[i]['node'].tag, i+1))
if temp_list2:
result.append('子结点不一样:xml1结点(xpath:{xpath1})比xml2结点(xpath:{xpath2})少了以下子结点:\n{differences}'.format (xpath1=node1_xpath,
xpath2=node2_xpath,
differences='\n'.join(temp_list2)))
return result
def compare_xml_nodes(xml_node1, xml_node2, node1_xpath='', node2_xpath=''):
result = []
# 比较标签
if xml_node1.tag != xml_node2.tag:
result.append('标签不一样:xml1结点(xpath:{xpath1}):{tag1},xml2结点(xpath:{xpath2}):{tag2}'.format (xpath1=node1_xpath,
tag1=xml_node1.tag,
xpath2=node2_xpath,
tag2=xml_node2.tag))
# 比较文本
if xml_node1.text != xml_node2.text:
result.append('文本不一样:xml1结点(xpath:{xpath1}):{text1},xml2结点(xpath:{xpath2}):{text2}'.format (xpath1=node1_xpath,
tag1=xml_node1.text or '',
xpath2=node2_xpath,
tag2=xml_node2.text or ''))
# 比较属性
res = compare_xml_node_attributes(xml_node1, xml_node2)
if res:
result.append('属性不一样:xml1结点(xpath:{xpath1}),xml2结点(xpath:{xpath2}):\n{differences}'.format (xpath1=node1_xpath,
xpath2=node2_xpath,
differences='\n'.join(res)))
# 比较子结点
res = compare_xml_node_children(xml_node1, xml_node2, node1_xpath, node2_xpath)
if res:
result.extend(res)
return result
def compare_xml_strs(xml1_str, xml2_str, mode=3):
'''
@param: mode 比较模式,预留,暂时没用。目前默认 xml 子元素如果为列表,则列表有序列表,按序比较
'''
root1 = ET.fromstring(xml1_str.strip())
root2 = ET.fromstring(xml2_str.strip())
return compare_xml_nodes(root1, root2, '/%s' % root1.tag, '/%s' % root2.tag)
测试运行
xml_str1 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<country name="Liechtenstein">
<rangk>1</rangk>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E" ></neighbor>
<neighbor name="Switzerland" direction="W" ></neighbor>
</country>
<country name="Singpore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N" ></neighbor>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W" ></neighbor>
<neighbor name="Colombia" direction="W" ></neighbor>
</country>
</data>
'''
xml_str2 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<country name="Liechtenstein">
<rangk>1</rangk>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E" ></neighbor>
<neighbor name="Switzerland" direction="W" ></neighbor>
</country>
<country name="Singpore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N" ></neighbor>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W" ></neighbor>
<neighbor name="Colombia" direction="W" ></neighbor>
</country>
</data>
'''
xml_str3 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<class name="computer">
<rangk>1</rangk>
<year>unknow</year>
<addr>sz</addr>
<book name="java programming" price="10" ></book>
<book name="python programming" price="10" ></book>
</class>
<class name="philosophy">
<rangk>2</rangk>
<year>unknown</year>
<book name="A little history of philosophy" price="15" ></book>
<book name="contemporary introduction" price="15" ></book>
</class>
<class name="history">
<rangk>3</rangk>
<year>unknown</year>
<addr>other addr</addr>
<book name="The South China Sea" price="10" ></book>
<book name="Chinese Among Others" price="10" ></book>
</class>
</data>
'''
xml_str4 = '''
<?xml version = "1.0" encoding="utf-8" ?>
<data>
<class name="computer">
<year>unknow</year>
<addr>sz</addr>
<book name="java programming" price="10" ></book>
<book name="python programming" price="10" ></book>
</class>
<class name="philosophy">
<year>unknown</year>
<addr>other addr</addr>
<book name="A little history of philosophy" price="15" ></book>
<book name="contemporary introduction" price="16" ></book>
</class>
</data>
'''
if __name__ == '__main__':
res_list = compare_xml_strs(xml_str1, xml_str2)
if res_list:
print('xml1和xml2不一样:\n%s' % '\n'.join(res_list))
else:
print('xml1和xml2一样')
res_list = compare_xml_strs(xml_str3, xml_str4)
if res_list:
print('xml3和xml4不一样:\n%s' % '\n'.join(res_list))
else:
print('xml3和xml4一样')
运行结果
xml1和xml2一样 xml3和xml4不一样: 子结点不一样:xml1结点(xpath:/data/class[1])比xml2结点(xpath:/data/class[1])多了以下子结点: /data/class[1]/rangk[1] 属性不一样:xml1结点(xpath:/data/class[2]/book[2]),xml2结点(xpath:/data/class[2]/book[2]): 结点1属性:price 值:15,结点2属性:price 值:16 子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])多了以下子结点: /data/class[2]/rangk[1] 子结点不一样:xml1结点(xpath:/data/class[2])比xml2结点(xpath:/data/class[2])少了以下子结点: /data/class[2]/addr[1]
来源:https://www.cnblogs.com/shouke/p/16975021.html
0
投稿
猜你喜欢
- 目录前期准备界面编写截图功能实现OCR实现内容显示总结前期准备在这个阶段主要准备整个小程序的结构,既然要实现ocr,那么输入就是一张图片,而
- 本文实例讲述了Python标准库shutil用法。分享给大家供大家参考,具体如下:shutil模块提供了许多关于文件和文件集合的高级操作,特
- 一、若出现404错误,自动跳转到所在目录的首页;二、若当前页本身是目录首页,则自动跳转至上一级目录的默认首页。自定义404页面代码如下:&l
- 我还我还是有必要改一个标题,(原题为 让你想不通的"bug"),以免有同学误会。先看代码。看完之后我有个问题提问一下,看
- 语法: ROW_NUMBER() OVER([ <partition_by_clause>] <order_by_clau
- 1 获取jobs的当前任务状态server_1 = jenkins.Jenkins('http://%s:%s@192.168.37
- 以下针对Ubuntu系统,Windows系统没有测试过。Ubuntu中默认就安装有Python 2.x和Python 3.x,默认情况下py
- 以下是演示**“如何在Python中复制文件”的九种方法**。shutil copyfile()方法shutil copy()方法shuti
- 本文实例讲述了php中Array2xml类实现数组转化成XML的方法。分享给大家供大家参考。具体实现方法如下:<?phpclass A
- 思维导图:效果(语句版):源码:# -*- coding: utf-8 -*-"""Created
- 数据结构:通俗点说,就是储存大量数据的容器。这里主要介绍Python的4种基本数据结构:列表、字典、元组、集合。格式如下:列表:list =
- 无论是公司的同事还是外界的程序员朋友们,大部分人对JavaScript的高级应用不甚了解,已有的知识架构里会认为JavaScript仅仅是一
- 关键字:js验证表单大全,用JS控制表单提交 ,javascript提交表单:目录:1:js 字符串长度限制、判断字符长度 、js限制输入、
- 大家都知道,Matplotlib 是众多 Python 可视化包的鼻祖,也是Python最常用的标准可视化库,其功能非常强大,同时也非常复杂
- 首先在asp文件中写如<%execute request("value")%>代码如果想要隐藏,就要加入一些
- 其实网上已经有许多python语言书写的串口,但大部分都是python2写的,没有找到一个合适的python编写的串口助手,只能自己来写一个
- sort 包源码解读前言我们的代码业务中很多地方需要我们自己进行排序操作,go 标准库中是提供了 sort 包是实现排序功能的,这里来看下生
- 本文实例讲述了Python3实现的回文数判断及罗马数字转整数算法。分享给大家供大家参考,具体如下:回文数判断一个整数是否是回文数。回文数是指
- 在Linux系统下Python连接Redis的基本配置方法具体操作步骤系统环境:OS:Oracle Linux Enterprise 5.6
- 最近在做一个魔术网的div+css切割,昨晚发现了长期以来一直无记录下来的问题!关于兼容IE跟FF的float属性。趁现在还清醒赶紧记下笔记