?
================== 闲扯的话================
对于现在越来越轻量级,越来越讲究速度和接近用户的应用来说,xml 确实有点复杂了。解析起来不仅耗内存,而且很复杂。这就好像花了几千块钱买了个MS Office ,但是80% 的feature 都用不着,还白白的耗着CPU 和内存。
个人觉得,设置文件用XML 其实挺好,因为设置文件一般并不太大,而且要求可读性强,还有很多乱七八糟的需求,可以利用XML 的力量。昨天搞chrome 的设置,发现chrome 的设置文件也是使用的json ,读起来也是轻松愉快。
前阵子做了个程序,需要解析豆瓣API 调用返回的XML 。真想说一句。。。豆瓣你别用XML 了。。。至少,提供个json 版的API 调用吧。
(以上谨代表个人观点)
=================== 正文=================
解析豆瓣返回的xml ,实在是不想用DOM 这个重量级的玩意。DOM 这个玩意,说它强大好还是说它官僚好呢。我倾向于使用SAXP 解析。但是现在面 临的一个问题是,我需要根据xml 节点的名字和属性值(一个或者多个)来决定当前的值是不是我想要的。这就麻烦一点点。第一反应是考虑xpath 。后来觉 得不如自己做一个得了,权当是按需定制一个轻量级的xpath 。
首先定义XMLSearchUnit 类,这个类的实例用来描述一个需要在XML 中搜索的值,值可以是xml 节点的值,或者是节点的属性。
package
 com.deepnighttwo.resourceresolver.douban.resolver.utils;
import
 java.util.HashMap;
import
 java.util.Map;
import
 org.xml.sax.Attributes;
/**
?
* 
?
* Represent a search task. Target could
be value of a node or attribute of the
?
* node.
?
* 
?
* 
@author
 mzang
?
*/
public
 
class
 XMLSearchUnit {
??? 
// attribute values to be matched during search
??? 
private
 Map<String, String> attributeMatchValidation = 
new
 HashMap<String, String>();
??? 
// if target is an attribute, then set this member to be
the attribute name.
??? 
// if it is null or empty, then means
the target is node value.
??? 
private
 String expectedAttr;
?
??
// xml path, format
is: /node_name/node_name/...
??? 
private
 String xmlPath;
??? 
public
XMLSearchUnit(String xmlPath) {
??????? 
this
.xmlPath
= xmlPath;
??? 
}
??? 
/**
???? 
* if current node meets the search
conditions or not. Meets means the path
???? 
* is correct and the attribute value
is matched.
???? 
* 
???? 
* 
@param
 path
???? 
* 
@param
attributes
???? 
* 
@return
???? 
*/
??? 
public
 
boolean
 match(String path, Attributes attributes) {
??????? 
if
(xmlPath.equals(path) == 
false
) {
??????????? 
return
 
false
;
??????? 
}
??????? 
for
(String key : attributeMatchValidation.keySet()) {
??????????? 
String exp =
attributeMatchValidation.get(key);
??????????? 
String compare =
attributes.getValue(key);
??????????? 
if
(exp.equalsIgnoreCase(compare) == 
false
) {
?????? 
?????????
return
 
false
;
??????????? 
}
??????? 
}
??????? 
return
 
true
;
??? 
}
??? 
public
Map<String, String> getAttributeMatchValidation() {
??????? 
return
attributeMatchValidation;
??? 
}
??? 
public
 
void
 addAttributeValidation(String key, String value) {
?? 
?????
attributeMatchValidation.put(key, value);
??? 
}
??? 
public
String getXmlPath() {
??????? 
return
xmlPath;
??? 
}
??? 
public
 
void
 setAttributeMatchValidation(
??????????? 
Map<String, String>
attributeMatchValidation) {
??????? 
this
.attributeMatchValidation
= attributeMatchValidation;
??? 
}
??? 
public
String getExpectedAttr() {
??????? 
return
expectedAttr;
??? 
}
??? 
/**
???? 
* if target is node value, then set
expectedAttr to null. if target is an
???? 
* attribute value, set it to be the
attribute name.
???? 
* 
???? 
* 
@param
expectedAttr
???? 
*/
??? 
public
 
void
 setExpectedAttr(String expectedAttr) {
??????? 
this
.expectedAttr
= expectedAttr;
??? 
}
??? 
/**
???? 
* hash code can be cached if all
properties are not be be changed.
???? 
*/
??? 
@Override
??? 
public
 
int
 hashCode() {
??????? 
final
 
int
 prime = 31;
??????? 
int
result = 1;
??????? 
result = prime
??????????????? 
* result
??????????????? 
+
((attributeMatchValidation == 
null
) ? 0
??????????????????????? 
:
attributeMatchValidation.hashCode());
??????? 
result = prime * result
??????????????? 
+ ((expectedAttr == 
null
) ? 0 : expectedAttr.hashCode());
??????? 
result = prime * result +
((xmlPath == 
null
) ? 0 : xmlPath.hashCode());
??????? 
return
result;
??? 
}
??? 
@Override
??? 
public
 
boolean
 equals(Object obj) {
??????? 
if
 (
this
 == obj)
??????????? 
return
 
true
;
??????? 
if
 (obj
== 
null
)
??????????? 
return
 
false
;
??????? 
if
(getClass() != obj.getClass())
??????????? 
return
 
false
;
??????? 
XMLSearchUnit other =
(XMLSearchUnit) obj;
??????? 
if
 (attributeMatchValidation
== 
null
) {
??????????? 
if
(other.attributeMatchValidation != 
null
)
??????????????? 
return
 
false
;
??????? 
} 
else
 
if
 (!attributeMatchValidation
???????????????
.equals(other.attributeMatchValidation))
??????????? 
return
 
false
;
??????? 
if
(expectedAttr == 
null
) {
??????????? 
if
(other.expectedAttr != 
null
)
??????????????? 
return
 
false
;
??????? 
} 
else
 
if
 (!expectedAttr.equals(other.expectedAttr))
??????????? 
return
 
false
;
??????? 
if
(xmlPath == 
null
) {
??????????? 
if
(other.xmlPath != 
null
)
??????????????? 
return
 
false
;
??????? 
} 
else
 
if
 (!xmlPath.equals(other.xmlPath))
??????????? 
return
 
false
;
??????? 
return
 
true
;
??? 
}
}
这个类比较简单。就是用一个hashmap 保待匹配的attribut 键值对,用一个字符串表示期待的attribute name ,用一个字符串表示期待的node path 。
然后就是如何在SAXP 里用到这个类的实例去搜索了。
package
 com.deepnighttwo.resourceresolver.douban.resolver.utils;
import
 java.io.InputStream;
import
 java.util.ArrayList;
import
 java.util.HashMap;
import
 java.util.List;
import
 java.util.Map;
import
 javax.xml.parsers.SAXParser;
import
 javax.xml.parsers.SAXParserFactory;
import
 org.xml.sax.Attributes;
import
 org.xml.sax.InputSource;
import
 org.xml.sax.SAXException;
import
 org.xml.sax.XMLReader;
import
 org.xml.sax.helpers.DefaultHandler;
/**
?
* 
?
* SAXP parser working with
XMLSearchUnit.
?
* 
?
* 
@author
 mzang
?
*/
public
 
class
 DoubanSearchParser 
extends
DefaultHandler {
??? 
// create and initial search units
??? 
public
 
static
 
final
 XMLSearchUnit DETAILS_LINK_API_PATH = 
new
 XMLSearchUnit(
??????????? 
"/feed/entry/id");
??? 
public
 
static
 
final
 XMLSearchUnit DETAILS_CONTENT_PATH = 
new
 XMLSearchUnit(
??????????? 
"/entry/summary");
??? 
public
 
static
 
final
 XMLSearchUnit DETAILS_TITLE_PATH = 
new
 XMLSearchUnit(
??????????? 
"/entry/title");
??? 
public
 
static
 
final
 XMLSearchUnit DETAILS_CHINESE_NAME_PATH = 
new
 XMLSearchUnit(
???????????
"/entry/db:attribute");
??? 
public
 
static
 
final
 XMLSearchUnit DETAILS_RATINGE_PATH = 
new
 XMLSearchUnit(
???????????
"/entry/gd:rating");
??? 
public
 
static
 
final
 XMLSearchUnit
DETAILS_RATINGE_RATER_COUNT_PATH = 
new
 XMLSearchUnit(
?
?????????? 
"/entry/gd:rating");
??? 
public
 
static
 
final
 XMLSearchUnit DETAILS_LINK_URL_PATH = 
new
 XMLSearchUnit(
???????????
"/feed/entry/link");
??? 
static
 {
???????
DETAILS_LINK_URL_PATH.addAttributeValidation("rel",
"alternate");
??????? 
DETAILS_LINK_URL_PATH.setExpectedAttr("href");
???????
DETAILS_CHINESE_NAME_PATH.addAttributeValidation("lang",
"zh_CN");
???????
DETAILS_CHINESE_NAME_PATH.addAttributeValidation("name",
"aka");
???????
DETAILS_RATINGE_PATH.setExpectedAttr("average");
?
??????
DETAILS_RATINGE_RATER_COUNT_PATH.setExpectedAttr("numRaters");
??? 
}
??? 
// a map to store the XMLSearchUnit and value
??? 
private
 Map<XMLSearchUnit, String> results = 
new
 HashMap<XMLSearchUnit, String>();
??? 
// a counter of search unit. if it is 0, then all search
unit finds a match
??? 
// value and the result of the XML
will be skipped.
??? 
private
 
int
 count = 0;
??? 
private
StringBuilder path = 
new
 StringBuilder();
??? 
private
 
static
 
final
 String pathSeparater = "/";
??? 
private
XMLSearchUnit[] searchUnits;
??? 
List<XMLSearchUnit> foundItems
= 
new
 ArrayList<XMLSearchUnit>();
??? 
/**
???? 
* constructor, accept XML input
stream, 0 or more search unit instances.
???? 
* 
???? 
* 
@param
 input
???? 
* 
@param
expectedPath
???? 
* 
@return
???? 
*/
?? 
?
public
 Map<XMLSearchUnit, String>
parseResults(InputStream input,
??????????? 
XMLSearchUnit...
expectedPath) {
??????? 
for
(XMLSearchUnit search : expectedPath) {
??????????? 
results.put(search, 
null
);
??????? 
}
??????? 
searchUnits = expectedPath;
???? 
???
count = expectedPath.length;
??????? 
XMLReader xmlReader = 
null
;
??????? 
try
 {
??????????? 
SAXParserFactory spfactory =
SAXParserFactory.newInstance();
??????????? 
spfactory.setValidating(
false
);
??????????? 
SAXParser saxParser =
spfactory.newSAXParser();
??????????? 
xmlReader =
saxParser.getXMLReader();
??????????? 
xmlReader.setContentHandler(
this
);
??????????? 
xmlReader.parse(
new
 InputSource(input));
??????? 
} 
catch
(Exception e) {
??????????? 
System.err.println(e);
??????????? 
System.exit(1);
?????? 
?
}
??????? 
return
results;
??? 
}
??? 
private
 
void
 addToPath(String addPath) {
???????
path.append(pathSeparater).append(addPath.toLowerCase());
??? 
}
??? 
private
 
void
 popPath() {
??????? 
int
 index
= path.lastIndexOf(pathSeparater);
??????? 
// String removedPath = path.substring(index);
???????
path.delete(index, path.length());
??? 
}
??? 
@Override
??? 
public
 
void
 startElement(String uri, String localName, String qName,
??????????? 
Attributes attributes) 
throws
 SAXException {
??????? 
foundItems.clear();
?
??????
if
 (count == 0) {
??????????? 
return
;
??????? 
}
??????? 
// update path
???????
addToPath(qName);
??????? 
List<XMLSearchUnit>
foundAttrItems = 
null
;
??????? 
// check if current node matches search units. if it is a
node value
??????? 
// search, then store it in a
member variable named foundItems because
??????? 
// the value of the node is known
only when reaches the end of the
??????? 
// node.but for attribute search,
it value is known here. So then are
??????? 
// put in a local variable list
named foundAttrItems.
???????
for
 (XMLSearchUnit unit : searchUnits) {
??????????? 
if
(unit.match(path.toString(), attributes) == 
true
) {
??????????????? 
if
(unit.getExpectedAttr() == 
null
) {
??????????????????? 
foundItems.add(unit);
??????????????? 
} 
else
 {
?
?????????????????? 
if
(foundAttrItems == 
null
) {
??????????????????????? 
foundAttrItems = 
new
 ArrayList<XMLSearchUnit>();
??????????????????? 
}
???????????????????
foundAttrItems.add(unit);
??????????????? 
}
??????????? 
}
??????? 
}
??????? 
// if no attribute match, return.
???????
if
 (foundAttrItems == 
null
) {
??????????? 
return
;
??????? 
}
??????? 
// fill search unit value using attribute value. update
count.
???????
for
 (XMLSearchUnit attrUnit : foundAttrItems) {
??????????? 
String attrValue =
attributes.getValue(attrUnit.getExpectedAttr());
??????????? 
if
(results.get(attrUnit) == 
null
) {
??????????????? 
count--;
??????????? 
}
??????????? 
results.put(attrUnit,
attrValue);
??????????? 
count--;
??????? 
}
??? 
}
??? 
/**
???? 
* if current node matches, the the
node value is useful, store it.
???? 
*/
??? 
@Override
??? 
public
 
void
 characters(
char
[] ch, 
int
start, 
int
 length)
??????????? 
throws
SAXException {
??????? 
if
(count == 0) {
??????????? 
return
;
??????? 
}
??????? 
if
(foundItems.size() == 0) {
?????????? 
?
return
;
??????? 
}
??????? 
for
(XMLSearchUnit unit : foundItems) {
??????????? 
String content = 
new
 String(ch, start, length);
??????????? 
if
(results.get(unit) == 
null
) {
??????????????? 
count--;
??????????? 
}
??????????? 
results.put(unit, content);
??? 
????
}
??? 
}
??? 
@Override
??? 
public
 
void
 endElement(String uri, String localName, String qName)
??????????? 
throws
SAXException {
??????? 
foundItems.clear();
??????? 
if
(count == 0) {
??????????? 
return
;
??????? 
}
??????? 
popPath();
??? 
}
}
?