XML数据读取方式性能比较(一)
Posted on 2010-08-12 06:33
小城故事 阅读(3092)
评论(28)
编辑
收藏
几个月来,疑被SOA,一直在和XML操作打交道,SQL差不多又忘光了。现在已经知道,至少有四种常用人XML数据操作方式(好像Java差不多),不过还没有实际比较过这些方式各有哪些特点或优劣。正好看到网上也没有这方面的实验,偶来总结一下。
测试开始先读取XML源,用一个比较大的RSS文件链接,复制到项目bin/debug目录下。
Stream xmlStream = new MemoryStream(File.ReadAllBytes(path));
一、XmlDocument 方式
代码
1 static
IList testXmlDocument()
2
{
3
var doc
=
new
XmlDocument();
4
doc.Load(xmlStream);
5
var nodeList
=
doc.DocumentElement.ChildNodes;
6
var lstChannel
=
new
List
<
Object
>
(nodeList.Count );
7
foreach
(XmlNode node
in
nodeList)
8
{
9
var channel
=
new
10
{
11
Title
=
node.SelectSingleNode(
"
title
"
).InnerText,
12
Link
=
node.SelectSingleNode(
"
link
"
).InnerText,
13
Description
=
node.SelectSingleNode(
"
description
"
).InnerText,
14
Content
=
node.SelectSingleNode(
"
content
"
).InnerText,
15
PubDate
=
node.SelectSingleNode(
"
pubDate
"
).InnerText,
16
Author
=
node.SelectSingleNode(
"
author
"
).InnerText,
17
Category
=
node.SelectSingleNode(
"
category
"
).InnerText
18
};
19
lstChannel.Add(channel);
20
}
21
return
lstChannel;
22
}
二、XPathNavigator 方式
代码
1 static IList testXmlNavigator()
2
{
3
var doc
=
new
XmlDocument();
4
doc.Load(xmlStream);
5
var nav
=
doc.CreateNavigator();
6
nav.MoveToRoot();
7
var nodeList
=
nav.Select(
"
/channel/item
"
);
8
var lstChannel
=
new
List
<
Object
>
(nodeList.Count);
9
foreach
(XPathNavigator node
in
nodeList)
10
{
11
var channel
=
new
12
{
13
Title
=
node.SelectSingleNode(
"
title
"
).Value,
14
Link
=
node.SelectSingleNode(
"
link
"
).Value,
15
Description
=
node.SelectSingleNode(
"
description
"
).Value,
16
Content
=
node.SelectSingleNode(
"
content
"
).Value,
17
PubDate
=
node.SelectSingleNode(
"
pubDate
"
).Value,
18
Author
=
node.SelectSingleNode(
"
author
"
).Value,
19
Category
=
node.SelectSingleNode(
"
category
"
).Value
20
};
21
lstChannel.Add(channel);
22
}
23
return
lstChannel;
24
}
三、XmlTextReader 方式
代码
1 static List <
Channel
>
testXmlReader()
2
{
3
var lstChannel
=
new
List
<
Channel
>
();
4
var reader
=
XmlReader.Create(xmlStream);
5
while
(reader.Read())
6
{
7
if
(reader.Name
==
"
item
"
&&
reader.NodeType
==
XmlNodeType.Element)
8
{
9
var channel
=
new
Channel();
10
lstChannel.Add(channel);
11
while
(reader.Read())
12
{
13
if
(reader.Name
==
"
item
"
)
break
;
14
if
(reader.NodeType
!=
XmlNodeType.Element)
continue
;
15
switch
(reader.Name)
16
{
17
case
"
title
"
:
18
channel.Title
=
reader.ReadString();
19
break
;
20
case
"
link
"
:
21
channel.Link
=
reader.ReadString();
22
break
;
23
case
"
description
"
:
24
channel.Description
=
reader.ReadString();
25
break
;
26
case
"
content
"
:
27
channel.Content
=
reader.ReadString();
28
break
;
29
case
"
pubDate
"
:
30
channel.PubDate
=
reader.ReadString();
31
break
;
32
case
"
author
"
:
33
channel.Author
=
reader.ReadString();
34
break
;
35
case
"
category
"
:
36
channel.Category
=
reader.ReadString();
37
break
;
38
default
:
39
break
;
40
}
41
}
42
}
43
}
44
return
lstChannel;
45
}
四、Linq to XML 方式
1 static IList testXmlLinq()
2
{
3
var xd
=
XDocument.Load(xmlStream);
4
var list
=
from node
in
xd.Elements(
"
channel
"
).Descendants(
"
item
"
)
5
select
new
6
{
7
Title
=
node.Element(
"
title
"
).Value,
8
Link
=
node.Element(
"
link
"
).Value,
9
Description
=
node.Element(
"
description
"
).Value,
10
Content
=
node.Element(
"
content
"
).Value,
11
PubDate
=
node.Element(
"
pubDate
"
).Value,
12
Author
=
node.Element(
"
author
"
).Value,
13
Category
=
node.Element(
"
category
"
).Value
14
};
15
return
list.ToList();
16
}
测试结果:
XmlDocment | 47ms |
XPathNavigator | 42ms |
XmlTextReader | 23ms |
Xml Linq | 28ms |
小结一下自己的认识,XmlDocument的操作基本按W3C的DOM操作方式,不过要将全部节点解析成对象加载到内存中,往往造成很大浪费。所以微软自己的编程规范也不推荐用它。这里由于读取了所有节点,可能因此性能和Navigator方式相差不大。在三种随机读取方式中,Xml Linq性能最高,只是方法名有点别扭。XmlTextReader方式是所谓的SAX,只读向前,无疑性能最高,不过实现上麻烦了不少,要比较精确的控制访问逻辑,也无法用匿名类存储数据。
.Net 3.5发布Xml Linq可以很好地取代前两种方式,通常情况下,最好用它。只有个别场合,如果对性能要求极高,或者读取Xml数据量太大不能一下子下载或读取到内存中,那就只好痛苦委身于XmlTextReader了。