我想使用Java中的XPath读取XML数据,因此对于我所收集的信息,我无法根据我的要求解析XML。

这就是我想做的:

从网上通过其URL获取XML文件,然后使用XPath来解析它,我想在其中创建两个方法。一种情况是,我输入一个特定的节点属性id,结果得到所有的子节点,另一种情况是,假设我只想得到一个特定的子节点值

<?xml version="1.0"?>
<howto>
  <topic name="Java">
      <url>http://www.rgagnonjavahowto.htm</url>
  <car>taxi</car>
  </topic>
  <topic name="PowerBuilder">
       <url>http://www.rgagnon/pbhowto.htm</url>
       <url>http://www.rgagnon/pbhowtonew.htm</url>
  </topic>
  <topic name="Javascript">
        <url>http://www.rgagnon/jshowto.htm</url>
  </topic>
 <topic name="VBScript">
       <url>http://www.rgagnon/vbshowto.htm</url>
 </topic>
 </howto>

在上面的例子中,我想读取所有的元素,如果我通过@name搜索,还有一个函数,我只是想从@name 'Javascript'的url只返回一个节点元素。


当前回答

入门示例:

xml文件:

<inventory>
    <book year="2000">
        <title>Snow Crash</title>
        <author>Neal Stephenson</author>
        <publisher>Spectra</publisher>
        <isbn>0553380958</isbn>
        <price>14.95</price>
    </book>

    <book year="2005">
        <title>Burning Tower</title>
        <author>Larry Niven</author>
        <author>Jerry Pournelle</author>
        <publisher>Pocket</publisher>
        <isbn>0743416910</isbn>
        <price>5.99</price>
    </book>

    <book year="1995">
        <title>Zodiac</title>
        <author>Neal Stephenson</author>
        <publisher>Spectra</publisher>
        <isbn>0553573862</isbn>
        <price>7.50</price>
    </book>

    <!-- more books... -->

</inventory>

Java代码:

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;


try {

    DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
    Document doc = docBuilder.parse (new File("c:\\tmp\\my.xml"));

    // normalize text representation
    doc.getDocumentElement().normalize();
    System.out.println ("Root element of the doc is " + doc.getDocumentElement().getNodeName());

    NodeList listOfBooks = doc.getElementsByTagName("book");
    int totalBooks = listOfBooks.getLength();
    System.out.println("Total no of books : " + totalBooks);

    for(int i=0; i<listOfBooks.getLength() ; i++) {

        Node firstBookNode = listOfBooks.item(i);
        if(firstBookNode.getNodeType() == Node.ELEMENT_NODE) {

            Element firstElement = (Element)firstBookNode;                              
            System.out.println("Year :"+firstElement.getAttribute("year"));

            //-------
            NodeList firstNameList = firstElement.getElementsByTagName("title");
            Element firstNameElement = (Element)firstNameList.item(0);

            NodeList textFNList = firstNameElement.getChildNodes();
            System.out.println("title : " + ((Node)textFNList.item(0)).getNodeValue().trim());
        }
    }//end of for loop with s var
} catch (SAXParseException err) {
    System.out.println ("** Parsing error" + ", line " + err.getLineNumber () + ", uri " + err.getSystemId ());
    System.out.println(" " + err.getMessage ());
} catch (SAXException e) {
    Exception x = e.getException ();
    ((x == null) ? e : x).printStackTrace ();
} catch (Throwable t) {
    t.printStackTrace ();
}                

其他回答

下面是一个用vtd-xml处理xpath的例子…对于繁重的XML处理,它是首屈一指的。这是最近的一篇关于用Java处理XML -性能基准的论文

import com.ximpleware.*;

public class changeAttrVal {
    public  static  void main(String s[]) throws VTDException,java.io.UnsupportedEncodingException,java.io.IOException{
        VTDGen vg = new VTDGen();
        if (!vg.parseFile("input.xml", false))
            return;
        VTDNav vn = vg.getNav();
        AutoPilot ap = new AutoPilot(vn);
        XMLModifier xm = new XMLModifier(vn);
        ap.selectXPath("/*/place[@id=\"p14\" and   @initialMarking=\"2\"]/@initialMarking");
        int i=0;
        while((i=ap.evalXPath())!=-1){
            xm.updateToken(i+1, "499");// change initial marking from 2 to 499
        }
        xm.output("new.xml");
    }

}

这将向您展示如何

将XML文件读入DOM 用XPath过滤出一组节点 对每个提取的节点执行特定的操作。

我们将用下面的语句调用代码

processFilteredXml(xmlIn, xpathExpr,(node) -> {/*Do something...*/;});

在本例中,我们希望从book.xml中打印一些creatorName,使用"//book/creators/creator/creatorName"作为xpath,在每个匹配xpath的节点上执行printNode操作。

完整代码

@Test
public void printXml() {
    try (InputStream in = readFile("book.xml")) {
        processFilteredXml(in, "//book/creators/creator/creatorName", (node) -> {
            printNode(node, System.out);
        });
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

private InputStream readFile(String yourSampleFile) {
    return Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile);
}

private void processFilteredXml(InputStream in, String xpath, Consumer<Node> process) {
    Document doc = readXml(in);
    NodeList list = filterNodesByXPath(doc, xpath);
    for (int i = 0; i < list.getLength(); i++) {
        Node node = list.item(i);
        process.accept(node);
    }
}

public Document readXml(InputStream xmlin) {
    try {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        DocumentBuilder db = dbf.newDocumentBuilder();
        return db.parse(xmlin);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

private NodeList filterNodesByXPath(Document doc, String xpathExpr) {
    try {
        XPathFactory xPathFactory = XPathFactory.newInstance();
        XPath xpath = xPathFactory.newXPath();
        XPathExpression expr = xpath.compile(xpathExpr);
        Object eval = expr.evaluate(doc, XPathConstants.NODESET);
        return (NodeList) eval;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

private void printNode(Node node, PrintStream out) {
    try {
        Transformer transformer = TransformerFactory.newInstance().newTransformer();
        transformer.setOutputProperty(OutputKeys.INDENT, "yes");
        transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
        transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
        StreamResult result = new StreamResult(new StringWriter());
        DOMSource source = new DOMSource(node);
        transformer.transform(source, result);
        String xmlString = result.getWriter().toString();
        out.println(xmlString);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

打印

<creatorName>Fosmire, Michael</creatorName>

<creatorName>Wertz, Ruth</creatorName>

<creatorName>Purzer, Senay</creatorName>

对于book.xml

<book>
  <creators>
    <creator>
      <creatorName>Fosmire, Michael</creatorName>
      <givenName>Michael</givenName>
      <familyName>Fosmire</familyName>
    </creator>
    <creator>
      <creatorName>Wertz, Ruth</creatorName>
      <givenName>Ruth</givenName>
      <familyName>Wertz</familyName>
    </creator>
    <creator>
      <creatorName>Purzer, Senay</creatorName>
       <givenName>Senay</givenName>
       <familyName>Purzer</familyName>
    </creator>
  </creators>
  <titles>
    <title>Critical Engineering Literacy Test (CELT)</title>
  </titles>
</book>

入门示例:

xml文件:

<inventory>
    <book year="2000">
        <title>Snow Crash</title>
        <author>Neal Stephenson</author>
        <publisher>Spectra</publisher>
        <isbn>0553380958</isbn>
        <price>14.95</price>
    </book>

    <book year="2005">
        <title>Burning Tower</title>
        <author>Larry Niven</author>
        <author>Jerry Pournelle</author>
        <publisher>Pocket</publisher>
        <isbn>0743416910</isbn>
        <price>5.99</price>
    </book>

    <book year="1995">
        <title>Zodiac</title>
        <author>Neal Stephenson</author>
        <publisher>Spectra</publisher>
        <isbn>0553573862</isbn>
        <price>7.50</price>
    </book>

    <!-- more books... -->

</inventory>

Java代码:

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;


try {

    DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
    Document doc = docBuilder.parse (new File("c:\\tmp\\my.xml"));

    // normalize text representation
    doc.getDocumentElement().normalize();
    System.out.println ("Root element of the doc is " + doc.getDocumentElement().getNodeName());

    NodeList listOfBooks = doc.getElementsByTagName("book");
    int totalBooks = listOfBooks.getLength();
    System.out.println("Total no of books : " + totalBooks);

    for(int i=0; i<listOfBooks.getLength() ; i++) {

        Node firstBookNode = listOfBooks.item(i);
        if(firstBookNode.getNodeType() == Node.ELEMENT_NODE) {

            Element firstElement = (Element)firstBookNode;                              
            System.out.println("Year :"+firstElement.getAttribute("year"));

            //-------
            NodeList firstNameList = firstElement.getElementsByTagName("title");
            Element firstNameElement = (Element)firstNameList.item(0);

            NodeList textFNList = firstNameElement.getChildNodes();
            System.out.println("title : " + ((Node)textFNList.item(0)).getNodeValue().trim());
        }
    }//end of for loop with s var
} catch (SAXParseException err) {
    System.out.println ("** Parsing error" + ", line " + err.getLineNumber () + ", uri " + err.getSystemId ());
    System.out.println(" " + err.getMessage ());
} catch (SAXException e) {
    Exception x = e.getException ();
    ((x == null) ? e : x).printStackTrace ();
} catch (Throwable t) {
    t.printStackTrace ();
}                

使用XPathFactory、SAXParserFactory和StAX (JSR-173)读取XML文件。

使用XPath获取节点及其子数据。

public static void main(String[] args) {
    String xml = "<soapenv:Body xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/'>"
            + "<Yash:Data xmlns:Yash='http://Yash.stackoverflow.com/Services/Yash'>"
            + "<Yash:Tags>Java</Yash:Tags><Yash:Tags>Javascript</Yash:Tags><Yash:Tags>Selenium</Yash:Tags>"
            + "<Yash:Top>javascript</Yash:Top><Yash:User>Yash-777</Yash:User>"
            + "</Yash:Data></soapenv:Body>";
    String jsonNameSpaces = "{'soapenv':'http://schemas.xmlsoap.org/soap/envelope/',"
            + "'Yash':'http://Yash.stackoverflow.com/Services/Yash'}";
    String xpathExpression = "//Yash:Data";

    Document doc1 = getDocument(false, "fileName", xml);
    getNodesFromXpath(doc1, xpathExpression, jsonNameSpaces);
    System.out.println("\n===== ***** =====");
    Document doc2 = getDocument(true, "./books.xml", xml);
    getNodesFromXpath(doc2, "//person", "{}");
}
static Document getDocument( boolean isFileName, String fileName, String xml ) {
    Document doc = null;
    try {

        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        factory.setValidating(false);
        factory.setNamespaceAware(true);
        factory.setIgnoringComments(true);
        factory.setIgnoringElementContentWhitespace(true);

        DocumentBuilder builder = factory.newDocumentBuilder();
        if( isFileName ) {
            File file = new File( fileName );
            FileInputStream stream = new FileInputStream( file );
            doc = builder.parse( stream );
        } else {
            doc = builder.parse( string2Source( xml ) );
        }
    } catch (SAXException | IOException e) {
        e.printStackTrace();
    } catch (ParserConfigurationException e) {
        e.printStackTrace();
    }
    return doc;
}

/**
 * ELEMENT_NODE[1],ATTRIBUTE_NODE[2],TEXT_NODE[3],CDATA_SECTION_NODE[4],
 * ENTITY_REFERENCE_NODE[5],ENTITY_NODE[6],PROCESSING_INSTRUCTION_NODE[7],
 * COMMENT_NODE[8],DOCUMENT_NODE[9],DOCUMENT_TYPE_NODE[10],DOCUMENT_FRAGMENT_NODE[11],NOTATION_NODE[12]
 */
public static void getNodesFromXpath( Document doc, String xpathExpression, String jsonNameSpaces ) {
    try {
        XPathFactory xpf = XPathFactory.newInstance();
        XPath xpath = xpf.newXPath();

        JSONObject namespaces = getJSONObjectNameSpaces(jsonNameSpaces);
        if ( namespaces.size() > 0 ) {
            NamespaceContextImpl nsContext = new NamespaceContextImpl();

            Iterator<?> key = namespaces.keySet().iterator();
            while (key.hasNext()) { // Apache WebServices Common Utilities
                String pPrefix = key.next().toString();
                String pURI = namespaces.get(pPrefix).toString();
                nsContext.startPrefixMapping(pPrefix, pURI);
            }
            xpath.setNamespaceContext(nsContext );
        }

        XPathExpression compile = xpath.compile(xpathExpression);
        NodeList nodeList = (NodeList) compile.evaluate(doc, XPathConstants.NODESET);
        displayNodeList(nodeList);
    } catch (XPathExpressionException e) {
        e.printStackTrace();
    }
}

static void displayNodeList( NodeList nodeList ) {
    for (int i = 0; i < nodeList.getLength(); i++) {
        Node node = nodeList.item(i);
        String NodeName = node.getNodeName();

        NodeList childNodes = node.getChildNodes();
        if ( childNodes.getLength() > 1 ) {
            for (int j = 0; j < childNodes.getLength(); j++) {

                Node child = childNodes.item(j);
                short nodeType = child.getNodeType();
                if ( nodeType == 1 ) {
                    System.out.format( "\n\t Node Name:[%s], Text[%s] ", child.getNodeName(), child.getTextContent() );
                }
            }
        } else {
            System.out.format( "\n Node Name:[%s], Text[%s] ", NodeName, node.getTextContent() );
        }

    }
}
static InputSource string2Source( String str ) {
    InputSource inputSource = new InputSource( new StringReader( str ) );
    return inputSource;
}
static JSONObject getJSONObjectNameSpaces( String jsonNameSpaces ) {
    if(jsonNameSpaces.indexOf("'") > -1)    jsonNameSpaces = jsonNameSpaces.replace("'", "\"");
    JSONParser parser = new JSONParser();
    JSONObject namespaces = null;
    try {
        namespaces = (JSONObject) parser.parse(jsonNameSpaces);
    } catch (ParseException e) {
        e.printStackTrace();
    }
    return namespaces;
}

XML文档

<?xml version="1.0" encoding="UTF-8"?>
<book>
<person>
  <first>Yash</first>
  <last>M</last>
  <age>22</age>
</person>
<person>
  <first>Bill</first>
  <last>Gates</last>
  <age>46</age>
</person>
<person>
  <first>Steve</first>
  <last>Jobs</last>
  <age>40</age>
</person>
</book>

对于给定的xpatheexpression输出:

String xpathExpression = "//person/first";
/*OutPut:
 Node Name:[first], Text[Yash] 
 Node Name:[first], Text[Bill] 
 Node Name:[first], Text[Steve] */

String xpathExpression = "//person";
/*OutPut:
     Node Name:[first], Text[Yash] 
     Node Name:[last], Text[M] 
     Node Name:[age], Text[22] 
     Node Name:[first], Text[Bill] 
     Node Name:[last], Text[Gates] 
     Node Name:[age], Text[46] 
     Node Name:[first], Text[Steve] 
     Node Name:[last], Text[Jobs] 
     Node Name:[age], Text[40] */

String xpathExpression = "//Yash:Data";
/*OutPut:
     Node Name:[Yash:Tags], Text[Java] 
     Node Name:[Yash:Tags], Text[Javascript] 
     Node Name:[Yash:Tags], Text[Selenium] 
     Node Name:[Yash:Top], Text[javascript] 
     Node Name:[Yash:User], Text[Yash-777] */

请参阅此链接了解我们自己的NamespaceContext实现

如果你有一个像下面这样的xml

<e:Envelope
    xmlns:d = "http://www.w3.org/2001/XMLSchema"
    xmlns:e = "http://schemas.xmlsoap.org/soap/envelope/"
    xmlns:wn0 = "http://systinet.com/xsd/SchemaTypes/"
    xmlns:i = "http://www.w3.org/2001/XMLSchema-instance">
    <e:Header>
        <Friends>
            <friend>
                <Name>Testabc</Name>
                <Age>12121</Age>
                <Phone>Testpqr</Phone>
            </friend>
        </Friends>
    </e:Header>
    <e:Body>
        <n0:ForAnsiHeaderOperResponse xmlns:n0 = "http://systinet.com/wsdl/com/magicsoftware/ibolt/localhost/ForAnsiHeader/ForAnsiHeaderImpl#ForAnsiHeaderOper?KExqYXZhL2xhbmcvU3RyaW5nOylMamF2YS9sYW5nL1N0cmluZzs=">
            <response i:type = "d:string">12--abc--pqr</response>
        </n0:ForAnsiHeaderOperResponse>
    </e:Body>
</e:Envelope>

并希望提取下面的XML

<e:Header>
   <Friends>
      <friend>
         <Name>Testabc</Name>
         <Age>12121</Age>
         <Phone>Testpqr</Phone>
      </friend>
   </Friends>
</e:Header>

下面的代码有助于实现同样的目标

public static void main(String[] args) {

    File fXmlFile = new File("C://Users//abhijitb//Desktop//Test.xml");
    DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
    Document document;
    Node result = null;
    try {
        document = dbf.newDocumentBuilder().parse(fXmlFile);
        XPath xPath = XPathFactory.newInstance().newXPath();
        String xpathStr = "//Envelope//Header";
        result = (Node) xPath.evaluate(xpathStr, document, XPathConstants.NODE);
        System.out.println(nodeToString(result));
    } catch (SAXException | IOException | ParserConfigurationException | XPathExpressionException
            | TransformerException e) {
        e.printStackTrace();
    }
}

private static String nodeToString(Node node) throws TransformerException {
    StringWriter buf = new StringWriter();
    Transformer xform = TransformerFactory.newInstance().newTransformer();
    xform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
    xform.transform(new DOMSource(node), new StreamResult(buf));
    return (buf.toString());
}

现在,如果您只需要如下所示的xml

<Friends>
   <friend>
      <Name>Testabc</Name>
      <Age>12121</Age>
      <Phone>Testpqr</Phone>
   </friend>
</Friends>

你需要改变

String xpathStr = "//信封//头";to String xpathStr = "//信封//头/*";