sax、dom是兩種對xml文件進行解析的方法(沒有具體實現,只是介面),所以只有它們是無法解析xml文件的;jaxp只是api,它進一步封裝了sax、dom兩種介面,並且提供了DomcumentBuilderFactory/DomcumentBuilder和SAXParserFactory/SAXParser(預設使用xerces直譯器)。
如對DOM解析器還有疑問,請檢視這裡。目前在Java中用於解析XML的技術很多,主流的有DOM、SAX、JDOM、DOM4j,下面分別介紹這四種方式如何操作DOM。
university.xml
<?xml version="1.0" encoding="UTF-8"?> <university name="pku"> <college name="c1"> <class name="class1"> <student name="stu1" sex='male' age="21" /> <student name="stu2" sex='female' age="20" /> <student name="stu3" sex='female' age="20" /> </class> <class name="class2"> <student name="stu4" sex='male' age="19" /> <student name="stu5" sex='female' age="20" /> <student name="stu6" sex='female' age="21" /> </class> </college> <college name="c2"> <class name="class3"> <student name="stu7" sex='male' age="20" /> </class> </college> <college name="c3"> </college> </university>
dom讀寫xml 輸出屬性值,此方法我已經到上篇隨筆中有詳細介紹了,點這裡檢視.
TestDom.java
import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.w3c.dom.Text; import org.xml.sax.SAXException; /** * dom讀寫xml 輸出屬性值 * @author whwang */ public class TestDom { public static void read() { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestDom.class.getClassLoader().getResourceAsStream("university.xml"); //讀取src目錄下檔案 Document doc = builder.parse(in); Element root = doc.getDocumentElement(); // 獲取根元素 if (root == null) return; System.err.println(root.getAttribute("name")); //獲取根元素的name屬性值 NodeList collegeNodes = root.getChildNodes();// 根元素下的所有子元素 if (collegeNodes == null) return; /** * 迴圈所有的根元素下的所有隻節點 * 根元素下所有的college 節點 */ for(int i = 0; i < collegeNodes.getLength(); i++) { Node college = collegeNodes.item(i); if (college != null && college.getNodeType() == Node.ELEMENT_NODE) { System.err.println("\t" + college.getAttributes().getNamedItem("name").getNodeValue()); //獲取節點 name屬性值 // 所有的class節點 NodeList classNodes = college.getChildNodes(); if (classNodes == null) continue; for (int j = 0; j < classNodes.getLength(); j++) { Node clazz = classNodes.item(j); if (clazz != null && clazz.getNodeType() == Node.ELEMENT_NODE) { System.err.println("\t\t" + clazz.getAttributes().getNamedItem("name").getNodeValue()); // 所有的student節點 NodeList studentNodes = clazz.getChildNodes(); if (studentNodes == null) continue; for (int k = 0; k < studentNodes.getLength(); k++) { Node student = studentNodes.item(k); if (student != null && student.getNodeType() == Node.ELEMENT_NODE) { System.err.print("\t\t\t" + student.getAttributes().getNamedItem("name").getNodeValue()); System.err.print(" " + student.getAttributes().getNamedItem("sex").getNodeValue()); System.err.println(" " + student.getAttributes().getNamedItem("age").getNodeValue()); } } } } } } } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } /** * 讀取本地XML檔案 修改後另存為 */ public static void write() { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); try { DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestDom.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.parse(in); // 根節點 Element root = doc.getDocumentElement(); if (root == null) return; // 修改屬性 root.setAttribute("name", "tsu"); //給根元素新增 name屬性和屬性值 NodeList collegeNodes = root.getChildNodes(); if (collegeNodes != null) { for (int i = 0; i <collegeNodes.getLength() - 1; i++) { // 刪除節點 Node college = collegeNodes.item(i); if (college.getNodeType() == Node.ELEMENT_NODE) { String collegeName = college.getAttributes().getNamedItem("name").getNodeValue(); if ("c1".equals(collegeName) || "c2".equals(collegeName)) { root.removeChild(college); } else if ("c3".equals(collegeName)) { Element newChild = doc.createElement("class"); newChild.setAttribute("name", "c4"); college.appendChild(newChild); } } } } // 新增節點 Element addCollege = doc.createElement("college"); addCollege.setAttribute("name", "c5"); root.appendChild(addCollege); Text text = doc.createTextNode("text"); addCollege.appendChild(text); // 將修改後的文件儲存到檔案 TransformerFactory transFactory = TransformerFactory.newInstance(); Transformer transFormer = transFactory.newTransformer(); DOMSource domSource = new DOMSource(doc); File file = new File("src/dom-modify.xml"); if (file.exists()) { file.delete(); } file.createNewFile(); FileOutputStream out = new FileOutputStream(file); StreamResult xmlResult = new StreamResult(out); transFormer.transform(domSource, xmlResult); System.out.println(file.getAbsolutePath()); //獲取檔案的據對路徑 } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (TransformerConfigurationException e) { e.printStackTrace(); } catch (TransformerException e) { e.printStackTrace(); } } public static void main(String[] args) { read(); //write(); } }
SAX不用將整個文件載入到記憶體,基於事件驅動的API(Observer模式),使用者只需要註冊自己感興趣的事件即可。SAX提供EntityResolver, DTDHandler, ContentHandler, ErrorHandler介面,分別用於監聽解析實體事件、DTD處理事件、正文處理事件和處理出錯事件,與AWT類似,SAX還提供了一個對這4個介面預設的類DefaultHandler(這裡的預設實現,其實就是一個空方法),一般只要繼承DefaultHandler.
TestSAX.java
import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.Attributes; import org.xml.sax.InputSource; import org.xml.sax.Locator; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.helpers.DefaultHandler; /** * * @author whwang */ public class TestSAX { public static void main(String[] args) { read(); write(); } public static void read() { try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); InputStream in = TestSAX.class.getClassLoader().getResourceAsStream("university.xml"); parser.parse(in, new MyHandler()); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public static void write() { System.err.println("純SAX對於寫操作無能為力"); } } // 重寫對自己感興趣的事件處理方法 class MyHandler extends DefaultHandler { @Override public InputSource resolveEntity(String publicId, String systemId) throws IOException, SAXException { return super.resolveEntity(publicId, systemId); } @Override public void notationDecl(String name, String publicId, String systemId) throws SAXException { super.notationDecl(name, publicId, systemId); } @Override public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException { super.unparsedEntityDecl(name, publicId, systemId, notationName); } @Override public void setDocumentLocator(Locator locator) { super.setDocumentLocator(locator); } @Override public void startDocument() throws SAXException { System.err.println("開始解析文件"); } @Override public void endDocument() throws SAXException { System.err.println("解析結束"); } @Override public void startPrefixMapping(String prefix, String uri) throws SAXException { super.startPrefixMapping(prefix, uri); } @Override public void endPrefixMapping(String prefix) throws SAXException { super.endPrefixMapping(prefix); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { System.err.print("Element: " + qName + ", attr: "); print(attributes); } @Override public void endElement(String uri, String localName, String qName) throws SAXException { super.endElement(uri, localName, qName); } @Override public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { super.ignorableWhitespace(ch, start, length); } @Override public void processingInstruction(String target, String data) throws SAXException { super.processingInstruction(target, data); } @Override public void skippedEntity(String name) throws SAXException { super.skippedEntity(name); } @Override public void warning(SAXParseException e) throws SAXException { super.warning(e); } @Override public void error(SAXParseException e) throws SAXException { super.error(e); } @Override public void fatalError(SAXParseException e) throws SAXException { super.fatalError(e); } private void print(Attributes attrs) { if (attrs == null) return; System.err.print("["); for (int i = 0; i < attrs.getLength(); i++) { System.err.print(attrs.getQName(i) + " = " + attrs.getValue(i)); if (i != attrs.getLength() - 1) { System.err.print(", "); } } System.err.println("]"); } }
JDOM與DOM非常類似,它是處理XML的純JAVA API,API大量使用了Collections類,且JDOM僅使用具體類而不使用介面。 JDOM 它自身不包含解析器。它通常使用 SAX2 解析器來解析和驗證輸入 XML 文件(儘管它還可以將以前構造的 DOM 表示作為輸入)。它包含一些轉換器以將 JDOM 表示輸出成 SAX2 事件流、DOM 模型或 XML 文字文件。
TestJDom.java
import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.jdom.Attribute; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; import org.jdom.output.XMLOutputter; /** * JDom讀寫xml * @author whwang */ public class TestJDom { public static void main(String[] args) { read(); write(); } public static void read() { try { boolean validate = false; SAXBuilder builder = new SAXBuilder(validate); InputStream in = TestJDom.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.build(in); // 獲取根節點 <university> Element root = doc.getRootElement(); readNode(root, ""); } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } @SuppressWarnings("unchecked") public static void readNode(Element root, String prefix) { if (root == null) return; // 獲取屬性 List<Attribute> attrs = root.getAttributes(); if (attrs != null && attrs.size() > 0) { System.err.print(prefix); for (Attribute attr : attrs) { System.err.print(attr.getValue() + " "); } System.err.println(); } // 獲取他的子節點 List<Element> childNodes = root.getChildren(); prefix += "\t"; for (Element e : childNodes) { readNode(e, prefix); } } public static void write() { boolean validate = false; try { SAXBuilder builder = new SAXBuilder(validate); InputStream in = TestJDom.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.build(in); // 獲取根節點 <university> Element root = doc.getRootElement(); // 修改屬性 root.setAttribute("name", "tsu"); // 刪除 boolean isRemoved = root.removeChildren("college"); System.err.println(isRemoved); // 新增 Element newCollege = new Element("college"); newCollege.setAttribute("name", "new_college"); Element newClass = new Element("class"); newClass.setAttribute("name", "ccccc"); newCollege.addContent(newClass); root.addContent(newCollege); XMLOutputter out = new XMLOutputter(); File file = new File("src/jdom-modify.xml"); if (file.exists()) { file.delete(); } file.createNewFile(); FileOutputStream fos = new FileOutputStream(file); out.output(doc, fos); } catch (JDOMException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
dom4j是目前在xml解析方面是最優秀的(Hibernate、Sun的JAXM也都使用dom4j來解析XML),它合併了許多超出基本 XML 文件表示的功能,包括整合的 XPath 支援、XML Schema 支援以及用於大文件或流化文件的基於事件的處理
TestDom4j.java
import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.ProcessingInstruction; import org.dom4j.VisitorSupport; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; /** * Dom4j讀寫xml * @author whwang */ public class TestDom4j { public static void main(String[] args) { read1(); // read2(); write(); } public static void read1() { try { SAXReader reader = new SAXReader(); InputStream in = TestDom4j.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = reader.read(in); Element root = doc.getRootElement(); readNode(root, ""); } catch (DocumentException e) { e.printStackTrace(); } } @SuppressWarnings("unchecked") public static void readNode(Element root, String prefix) { if (root == null) return; // 獲取節點的屬性 List<Attribute> attrs = root.attributes(); if (attrs != null && attrs.size() > 0) { System.err.print(prefix); for (Attribute attr : attrs) { System.err.print(attr.getValue() + " "); } System.err.println(); } // 獲取他的子節點 List<Element> childNodes = root.elements(); prefix += "\t"; for (Element e : childNodes) { readNode(e, prefix); } } public static void read2() { try { SAXReader reader = new SAXReader(); InputStream in = TestDom4j.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = reader.read(in); doc.accept(new MyVistor()); } catch (DocumentException e) { e.printStackTrace(); } } /** * 寫入方法 */ public static void write() { try { // 建立一個xml文件 Document doc = DocumentHelper.createDocument(); Element university = doc.addElement("university"); university.addAttribute("name", "tsu"); // 註釋 university.addComment("這個是根節點"); Element college = university.addElement("college"); college.addAttribute("name", "cccccc"); college.setText("text"); File file = new File("src/dom4j-modify.xml"); if (file.exists()) { file.delete(); } file.createNewFile(); XMLWriter out = new XMLWriter(new FileWriter(file)); out.write(doc); out.flush(); out.close(); } catch (IOException e) { e.printStackTrace(); } } } class MyVistor extends VisitorSupport { public void visit(Attribute node) { System.out.println("Attibute: " + node.getName() + "=" + node.getValue()); } public void visit(Element node) { if (node.isTextOnly()) { System.out.println("Element: " + node.getName() + "=" + node.getText()); } else { System.out.println(node.getName()); } } @Override public void visit(ProcessingInstruction node) { System.out.println("PI:" + node.getTarget() + " " + node.getText()); } }
由於DOM4J比較重要我到網上還整理了一些程式碼:
Dom4j.java
import java.io.File; import java.io.FileOutputStream; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.UnsupportedEncodingException; import java.net.URL; import java.util.Iterator; import java.util.List; import org.dom4j.Attribute; import org.dom4j.Document; import org.dom4j.DocumentHelper; import org.dom4j.Element; import org.dom4j.Node; import org.dom4j.io.OutputFormat; import org.dom4j.io.SAXReader; import org.dom4j.io.XMLWriter; public class Dom4j { /** * 獲取Document物件 * 此方法檔案位於 專案根目錄 不是src目錄 * @param filename 專案根目錄下的XML檔案 * @return document * */ public static Document load(String filename) { Document document = null; try { SAXReader saxReader = new SAXReader(); document = saxReader.read(new File(filename)); //讀取XML檔案,獲得document物件 } catch (Exception ex) { ex.printStackTrace(); } return document; } /** * 通過url路徑獲取 Document物件 * 此方式 xml檔案位於遠端伺服器上 * @param url 遠端url檔案 * @return document物件 */ public static Document load2(URL url) { Document document = null; try { SAXReader saxReader = new SAXReader(); document = saxReader.read(url); //讀取XML檔案,獲得document物件 } catch (Exception ex) { ex.printStackTrace(); } return document; } /** * 獲取根節點 * @param doc document物件 * @return 根元素 */ public static Element getRootElement(Document doc){ Element root=null; root=doc.getRootElement(); //獲取根節點 return root; } /** * 將document樹輸出到指定的檔案 * @param document document物件 * @param filename 檔名 * @return 布林值 */ public static boolean doc2XmlFile(Document document, String filename) { boolean flag = true; try { XMLWriter writer = new XMLWriter( new OutputStreamWriter(new FileOutputStream(filename),"UTF-8")); writer.write(document); writer.close(); } catch (Exception ex) { flag = false; ex.printStackTrace(); } System.out.println(flag); return flag; } /** * * * 此方法在本類中無用 ,沒有整合 有興趣的可以自己動手整合一下 * * * Dom4j通過XMLWriter將Document物件表示的XML樹寫入指定的檔案, * 並使用OutputFormat格式物件指定寫入的風格和編碼方法。 * 呼叫OutputFormat.createPrettyPrint()方法可以獲得一個預設的pretty print風格的格式物件。 * 對OutputFormat物件呼叫setEncoding()方法可以指定XML檔案的編碼方法。 * @param doc * @param out * @param encoding * @throws UnsupportedEncodingException * @throws IOException */ /*public void writeTo(Document doc,OutputStream out, String encoding) throws UnsupportedEncodingException, IOException { OutputFormat format = OutputFormat.createPrettyPrint(); format.setEncoding("gb2312"); XMLWriter writer = new XMLWriter(System.out,format); writer.write(doc); writer.flush(); }*/ /** * 遍歷根標記下的子元素 * @param args */ public static void read(Element root){ for(Iterator i=root.elementIterator();i.hasNext();){ Element element=(Element)i.next(); System.out.print(element.getName()+":"+element.getText()); if(element.getNodeType()==Node.ELEMENT_NODE){ read(element); } } } /** * 寫入操作 * @param fileName */ public static void write(String fileName){ Document document=DocumentHelper.createDocument();//建立document物件,用來操作xml檔案 Element booksElement=document.addElement("books");//建立根節點 booksElement.addComment("This is a test for dom4j ");//加入一行註釋 Element bookElement=booksElement.addElement("book");//新增一個book節點 bookElement.addAttribute("show","yes");//新增屬性內容 Element titleElement=bookElement.addElement("title");//新增文字節點 titleElement.setText("ajax in action");//新增文字內容 try{ XMLWriter writer=new XMLWriter(new FileWriter(new File(fileName))); writer.write(document); writer.close(); }catch(Exception e){ e.printStackTrace(); } } /** * 修改XML檔案 */ public static void modifyXMLFile() { String oldStr = "test.xml"; String newStr = "test1.xml"; Document document = null; //修改節點的屬性 try { SAXReader saxReader = new SAXReader(); // 用來讀取xml文件 document = saxReader.read(new File(oldStr)); // 讀取xml文件 List list = document.selectNodes("/books/book/@show");// 用xpath查詢節點book的屬性 Iterator iter = list.iterator(); while (iter.hasNext()) { Attribute attribute = (Attribute) iter.next(); if (attribute.getValue().equals("yes")) attribute.setValue("no"); } } catch (Exception e) { e.printStackTrace(); } //修改節點的內容 try { SAXReader saxReader = new SAXReader(); // 用來讀取xml文件 document = saxReader.read(new File(oldStr)); // 讀取xml文件 List list = document.selectNodes("/books/book/title");// 用xpath查詢節點book的內容 Iterator iter = list.iterator(); while (iter.hasNext()) { Element element = (Element) iter.next(); element.setText("xxx");// 設定相應的內容 } } catch (Exception e) { e.printStackTrace(); } try { XMLWriter writer = new XMLWriter(new FileWriter(new File(newStr))); writer.write(document); writer.close(); } catch (Exception ex) { ex.printStackTrace(); } } public static void main(String[] args){ Document doc=load("student.xml"); Element root=getRootElement(doc); read(root); write("test.xml"); modifyXMLFile(); } }
XPath 是一門在 XML 文件中查詢資訊的語言, 可用來在 XML 文件中對元素和屬性進行遍歷。XPath 是 W3C XSLT 標準的主要元素,並且 XQuery 和 XPointer 同時被構建於 XPath 表達之上。因此,對 XPath 的理解是很多高階 XML 應用的基礎。
XPath非常類似對資料庫操作的SQL語言,或者說JQuery,它可以方便開發者抓起文件中需要的東西。(dom4j也支援xpath, dom4j使用xpath請點選這裡)
TestXPath.java
import java.io.IOException; import java.io.InputStream; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpression; import javax.xml.xpath.XPathExpressionException; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; /** * * 普通DOM與XPath * @author licheng * */ public class TestXPath { public static void main(String[] args) { read(); } public static void read() { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = dbf.newDocumentBuilder(); InputStream in = TestXPath.class.getClassLoader().getResourceAsStream("university.xml"); Document doc = builder.parse(in); XPathFactory factory = XPathFactory.newInstance(); XPath xpath = factory.newXPath(); // 選取所有class元素的name屬性 // XPath語法介紹: http://w3school.com.cn/xpath/ XPathExpression expr = xpath.compile("//class/@name"); NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET); for (int i = 0; i < nodes.getLength(); i++) { System.out.println("name = " + nodes.item(i).getNodeValue()); } } catch (XPathExpressionException e) { e.printStackTrace(); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }
不知為何,部落格園每天只能到發表一篇隨筆,發第二篇的時候不能到首頁顯示。
利用JAVA,將XML檔案匯入資料庫,和將資料庫資訊匯入到XML的筆記將到將到明後兩天釋出。
最後將分享一個利用XML當資料庫,查詢英語六級詞彙的案例,下面先分享一下截圖吧:
例項將到明後兩天分享。請多多關注喲。