Dom4j SAXReader Constructors

简介: Dom4j读取xml:eg1: package xml; import java.io.File; import org.dom4j.DocumentException; import org.

 

Dom4j读取xml:
eg1:

package xml;

import java.io.File;

import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;

public class XmlReader_Dom4j {
    public static void main(String[] args)  {
        String path = "D:\\test\\中文文件夹名\\namespaces.xml";
        readXml(path);//will throw exception
        File xmlFile=new File(path);
        readXml(xmlFile);
        path = "D:\\test\\path withWhiteSpace\\namespaces.xml";
        readXml(path);
        
        path = "D:\\test\\normal\\namespaces.xml";
        readXml(path);
    }

    private static void readXml(String path) {
        SAXReader saxReader=new SAXReader();
        try {
            saxReader.read(path);
            System.out.println("success");
        } catch (DocumentException e) {
            e.printStackTrace();
        }
    }
    
    private static void readXml(File xmlFile) {
        SAXReader saxReader=new SAXReader();
        try {
            saxReader.read(xmlFile);
            System.out.println("success");
        } catch (DocumentException e) {
            e.printStackTrace();
        }
    }
    
    
    

}

Output:

org.dom4j.DocumentException: unknown protocol: d Nested exception: unknown protocol: d
    at org.dom4j.io.SAXReader.read(SAXReader.java:484)
    at org.dom4j.io.SAXReader.read(SAXReader.java:321)
    at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11)
Nested exception: 
java.net.MalformedURLException: unknown protocol: d
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source)
    at org.dom4j.io.SAXReader.read(SAXReader.java:465)
    at org.dom4j.io.SAXReader.read(SAXReader.java:321)
    at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11)
Nested exception: java.net.MalformedURLException: unknown protocol: d
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLEntityManager.setupCurrentEntity(Unknown Source)
    at com.sun.org.apache.xerces.internal.impl.XMLVersionDetector.determineDocVersion(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XML11Configuration.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.XMLParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.parsers.AbstractSAXParser.parse(Unknown Source)
    at com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser.parse(Unknown Source)
    at org.dom4j.io.SAXReader.read(SAXReader.java:465)
    at org.dom4j.io.SAXReader.read(SAXReader.java:321)
    at xml.XmlReader_Dom4j.readXml(XmlReader_Dom4j.java:24)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:11)
success
success
success

 

Source code:

    /**
     * <p>
     * Reads a Document from the given URL or filename using SAX.
     * </p>
     * 
     * <p>
     * If the systemId contains a <code>':'</code> character then it is
     * assumed to be a URL otherwise its assumed to be a file name. If you want
     * finer grained control over this mechansim then please explicitly pass in
     * either a {@link URL}or a {@link File}instance instead of a {@link
     * String} to denote the source of the document.
     * </p>
     * 
     * @param systemId
     *            is a URL for a document or a file name.
     * 
     * @return the newly created Document instance
     * 
     * @throws DocumentException
     *             if an error occurs during parsing.
     */
    public Document read(String systemId) throws DocumentException {
        InputSource source = new InputSource(systemId);
        if (this.encoding != null) {
            source.setEncoding(this.encoding);
        }

        return read(source);
    }

 

eg2:

    private static void testWithUrl() throws MalformedURLException {
        System.out.println("=============testWithUrlBegin=============");
        
        String path = "file:///D:\\test\\中文文件夹名\\namespaces.xml";
        newUrl(path);
        readXml(path);
        
        path = "D:\\test\\中文文件夹名\\namespaces.xml";
        newUrl(path);
        
        System.out.println("=============testWithUrlEnd=============");
    }

    private static void newUrl(String path) throws MalformedURLException {
        try {
            new URL(path);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    private static void readXml(String path) {
        SAXReader saxReader=new SAXReader();
        try {
            Document document=saxReader.read(path);
            System.out.println("document.hasContent():"+document.hasContent());
            System.out.println("success");
        } catch (DocumentException e) {
            e.printStackTrace();
        }
    }

 

Output:

=============testWithUrlBegin=============
document.hasContent():true
success
java.net.MalformedURLException: unknown protocol: d
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at java.net.URL.<init>(Unknown Source)
    at xml.XmlReader_Dom4j.newUrl(XmlReader_Dom4j.java:50)
    at xml.XmlReader_Dom4j.testWithUrl(XmlReader_Dom4j.java:43)
    at xml.XmlReader_Dom4j.main(XmlReader_Dom4j.java:13)
=============testWithUrlEnd=============

 

saxReader.read(xmlFile)不报错的原因:

    /**
     * <p>
     * Reads a Document from the given <code>File</code>
     * </p>
     * 
     * @param file
     *            is the <code>File</code> to read from.
     * 
     * @return the newly created Document instance
     * 
     * @throws DocumentException
     *             if an error occurs during parsing.
     */
    public Document read(File file) throws DocumentException {
        try {
            /*
             * We cannot convert the file to an URL because if the filename
             * contains '#' characters, there will be problems with the URL in
             * the InputSource (because a URL like
             * http://myhost.com/index#anchor is treated the same as
             * http://myhost.com/index) Thanks to Christian Oetterli
             */
            InputSource source = new InputSource(new FileInputStream(file));
            if (this.encoding != null) {
                source.setEncoding(this.encoding);
            }
            String path = file.getAbsolutePath();

            if (path != null) {
                // Code taken from Ant FileUtils
                StringBuffer sb = new StringBuffer("file://");

                // add an extra slash for filesystems with drive-specifiers
                if (!path.startsWith(File.separator)) {
                    sb.append("/");
                }

                path = path.replace('\\', '/');
                sb.append(path);

                source.setSystemId(sb.toString());
            }

            return read(source);
        } catch (FileNotFoundException e) {
            throw new DocumentException(e.getMessage(), e);
        }
    }

 

java.net.URL.java中抛异常的位置:

    /**
     * Creates a <code>URL</code> object from the specified
     * <code>protocol</code>, <code>host</code>, <code>port</code>
     * number, <code>file</code>, and <code>handler</code>. Specifying
     * a <code>port</code> number of <code>-1</code> indicates that
     * the URL should use the default port for the protocol. Specifying
     * a <code>handler</code> of <code>null</code> indicates that the URL
     * should use a default stream handler for the protocol, as outlined
     * for:
     *     java.net.URL#URL(java.lang.String, java.lang.String, int,
     *                      java.lang.String)
     *
     * <p>If the handler is not null and there is a security manager,
     * the security manager's <code>checkPermission</code>
     * method is called with a
     * <code>NetPermission("specifyStreamHandler")</code> permission.
     * This may result in a SecurityException.
     *
     * No validation of the inputs is performed by this constructor.
     *
     * @param      protocol   the name of the protocol to use.
     * @param      host       the name of the host.
     * @param      port       the port number on the host.
     * @param      file       the file on the host
     * @param       handler    the stream handler for the URL.
     * @exception  MalformedURLException  if an unknown protocol is specified.
     * @exception  SecurityException
     *        if a security manager exists and its
     *        <code>checkPermission</code> method doesn't allow
     *        specifying a stream handler explicitly.
     * @see        java.lang.System#getProperty(java.lang.String)
     * @see        java.net.URL#setURLStreamHandlerFactory(
     *            java.net.URLStreamHandlerFactory)
     * @see        java.net.URLStreamHandler
     * @see        java.net.URLStreamHandlerFactory#createURLStreamHandler(
     *            java.lang.String)
     * @see        SecurityManager#checkPermission
     * @see        java.net.NetPermission
     */
    public URL(String protocol, String host, int port, String file,
           URLStreamHandler handler) throws MalformedURLException {
    if (handler != null) {
            SecurityManager sm = System.getSecurityManager();
            if (sm != null) {
                // check for permission to specify a handler
                checkSpecifyHandler(sm);
            }
        }

    protocol = protocol.toLowerCase();
        this.protocol = protocol;
     if (host != null) {

            /**
         * if host is a literal IPv6 address,
             * we will make it conform to RFC 2732
         */
            if (host != null && host.indexOf(':') >= 0
                    && !host.startsWith("[")) {
                host = "["+host+"]";
            }
            this.host = host;

        if (port < -1) {
        throw new MalformedURLException("Invalid port number :" +
                                                    port);
        }
            this.port = port;
        authority = (port == -1) ? host : host + ":" + port;
    }

    Parts parts = new Parts(file);
        path = parts.getPath();
        query = parts.getQuery();

        if (query != null) {
            this.file = path + "?" + query;
        } else {
            this.file = path;
        }
    ref = parts.getRef();    

    // Note: we don't do validation of the URL here. Too risky to change
        // right now, but worth considering for future reference. -br
        if (handler == null &&
            (handler = getURLStreamHandler(protocol)) == null) {
            throw new MalformedURLException("unknown protocol: " + protocol);
        }
        this.handler = handler;
    }

 

相关文章
|
XML Java 数据格式
|
3月前
|
XML JavaScript 数据格式
XML DOM 遍历节点树
XML DOM 遍历节点树
|
3月前
|
JavaScript
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
|
3月前
|
JavaScript
HTML DOM 节点树
HTML DOM 节点是指在 HTML 文档对象模型中,文档中的所有内容都被视为节点。整个文档是一个文档节点,每个 HTML 元素是元素节点,元素内的文本是文本节点,属性是属性节点,注释是注释节点。DOM 将文档表示为节点树,节点之间有父子和同胞关系。
|
3月前
|
JavaScript
HTML DOM 节点
HTML DOM(文档对象模型)将HTML文档视为节点树,其中每个部分都是节点:文档本身是文档节点,HTML元素是元素节点,元素内的文本是文本节点,属性是属性节点,注释是注释节点。节点间存在父子及同胞关系,形成层次结构。
|
3月前
|
XML JavaScript 数据格式
XML DOM 遍历节点树
XML DOM 遍历节点树
|
3月前
|
JavaScript
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
|
3月前
|
JavaScript
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
|
3月前
|
JavaScript
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)
|
3月前
|
JavaScript
DOM 节点列表长度(Node List Length)
DOM 节点列表长度(Node List Length)