XML Parsing

This talk provides a quick overview of XML parsing using Java

1 Parsing

2 Simple API for XML

2.1 SAX Sample Application

import java.io.FileReader;

import org.xml.sax.XMLReader;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.DefaultHandler;

/** Our handler must extend DefaultHandler */
public class MySAXApp extends DefaultHandler

  public static void main (String args[])
    throws Exception
    //Create an instance of the XML parser.
    XMLReader xr = XMLReaderFactory.createXMLReader();

    //Create an instance of our handles (see below)
    MySAXApp handler = new MySAXApp();
    xr.setErrorHandler(handler); //does double duty.

    // Parse each *file* provided on the
    // command line.
    for (int i = 0; i < args.length; i++) {
      FileReader r = new FileReader(args[i]);
      xr.parse(new InputSource(r));

  public MySAXApp ()

  // Event handlers.

  public void startDocument ()
    System.out.println("Start document");

  public void endDocument ()
    System.out.println("End document");

   * Receive notification at the beginning of an element.
   * @param uri The Namespace URI, or the empty string if the element
     has no Namespace URI or if Namespace processing is not being
   * @param name The local name (without prefix), or the empty string
     if Namespace processing is not being performed.
   * @param qName The qualified name (with prefix), or the empty
     string if qualified names are not available.[namespaceprefix:name]
   * @param atts The attributes attached to the element. If there are
     no attributes, it shall be an empty Attributes object.
  public void startElement (String uri, String name,
                            String qName, Attributes atts)
    if ("".equals (uri))
      System.out.println("Start element: " + qName);
      System.out.println("Start element: {" + uri + "}" + name);

  public void endElement (String uri, String name, String qName)
    if ("".equals (uri))
      System.out.println("End element: " + qName);
      System.out.println("End element:   {" + uri + "}" + name);

   * Receive notification of character data.
   * @param ch[] The characters from the XML document.
   * @param start The start position in the array.
   * @param length The number of characters to read from the array
  public void characters (char ch[], int start, int length)
    System.out.print("Characters:    \"");
    for (int i = start; i < start + length; i++) {
      switch (ch[i]) {
      case '\\':
      case '"':
      case '\n':
      case '\r':
      case '\t':


2.2 Application Output

<?xml version="1.0"?>

<poem xmlns="http://www.megginson.com/ns/exp/poetry">
  <title>Roses are Red</title>
  <l>Roses are red,</l>
  <l>Violets are blue;</l>
  <l>Sugar is sweet,</l>
  <l>And I love you.</l>

3 Document Object Model

3.1 DOM Structure

<?xml version="1.0" encoding="UTF-8"?>
    <customerid limit="1000">12341</customerid>
    <item instock="Y" itemid="SA15">
      <name>Silver Show Saddle, 16 inch</name>
    <item instock="N" itemid="C49">
      <name>Premium Cinch</name>
    <customerid limit="150">251222</customerid>
    <item instock="Y" itemid="WB78">
      <name>Winter Blanket (78 inch)</name>

DOM Tree

3.2 Node Types

3.3 DOM Parser

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import org.w3c.dom.Document;

public class OrderProcessor {
  public static void main (String args[]) {
    File docFile = new File("orders.xml");
    Document doc = null;      
    try {
      DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
      //A document builder builds specific documents.
      DocumentBuilder db = dbf.newDocumentBuilder();

      //This call parses the file and creates the Document in memory.
      doc = db.parse(docFile);
    } catch (Exception e) {
      System.out.print("Problem parsing the file.");

3.4 Traversing the Document

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import org.w3c.dom.*;

public class OrderProcessor {
  public static void main (String args[]) {

    //some code here
    doc = db.parse(docFile);

    //STEP 1:  Get the root element
    Element root = doc.getDocumentElement();
    System.out.println("The root element is "+root.getNodeName());
    //STEP 2:  Get the children
    NodeList children = root.getChildNodes();
    System.out.println("There are "+children.getLength()
                                  +" nodes in this document.");

    //STEP 3:  Step through the children
    for (Node child = root.getFirstChild(); 
         child != null;
         child = child.getNextSibling())
      //The node name is either the tag name or #text
      System.out.println(child.getNodeName()+" = "+child.getNodeValue());   

    //STEP 4:  Recurse this functionality


  private static void stepThrough (Node start)
    System.out.println(start.getNodeName()+" = "+start.getNodeValue());

    //if its a node, then print the attributes.
    if (start.getNodeType() == start.ELEMENT_NODE) 
      NamedNodeMap startAttr = start.getAttributes();
      for (int i = 0; 
           i < startAttr.getLength();
           i++) {
        Node attr = startAttr.item(i);
        System.out.println("  Attribute:  "+ attr.getNodeName()
                           +" = "+attr.getNodeValue());
    for (Node child = start.getFirstChild(); 
         child != null;
         child = child.getNextSibling())


3.5 Modifying a Document

public class OrderProcessor {

  /** Change the value of node elemName to elemValue */
  private static void changeOrder (Node start, 
                                   String elemName, 
                                   String elemValue)
    if (start.getNodeName().equals(elemName)) {
    for (Node child = start.getFirstChild(); 
         child != null;
         child = child.getNextSibling())
      changeOrder(child, elemName, elemValue);

  public static void main (String args[]) {
    // Change text value of node named status to processing.
    changeOrder(root, "status", "processing");

    //Get a list of nodes that have a status element.
    NodeList orders = root.getElementsByTagName("status");

    for (int orderNum = 0; 
         orderNum < orders.getLength(); 

      Element thisOrder = (Element)orders.item(orderNum);

      //Remove the limit attribute from customer.
      Element customer = (Element)thisOrder.getElementsByTagName("cusomertid").item(0);
      NodeList orderItems = thisOrder.getElementsByTagName("item");
      double total = 0;
      for (int itemNum = 0;
           itemNum < orderItems.getLength();
           itemNum++) {
        // Total up cost for each item and 
        // add to the order total
        //Get this item as an Element
        Element thisOrderItem = (Element)orderItems.item(itemNum);

        //Remove a node.
        //Remove anything with <item instock="N">
        if (thisOrderItem.getAttributeNode("instock").getNodeValue().equals("N")) {
          Node deadNode = thisOrderItem.getParentNode().removeChild(thisOrderItem);


          //Alternatively, we could have replaced this item with a backorderd element.
          // <item itemid="123">
          //   <backordered></backordered>
          // </item>
          Element backElement = doc.createElement("backordered");

          //<backordered itemid="">

          //<backordered itemid="123">
          String itemIdString = thisOrderItem.getAttributeNode("itemid").getNodeValue();
          backElement.setAttribute("itemid", itemIdString);

          Node deadNode = thisOrderItem.getParentNode()
            .replaceChild(backElement, thisOrderItem);

        //Get pricing information for this Item
        String thisPrice = thisOrderItem.getElementsByTagName("price").item(0)
        double thisPriceDbl = new Double(thisPrice).doubleValue();
        //Get quantity information for this Item
        String thisQty = thisOrderItem.getElementsByTagName("qty").item(0)
        double thisQtyDbl = new Double(thisQty).doubleValue();

        double thisItemTotal = thisPriceDbl*thisQtyDbl;
        total = total + thisItemTotal;
      String totalString = new Double(total).toString();

      Node totalNode = doc.createTextNode(totalString);

      Element totalElement = doc.createElement("total");


      //Add that element before anyone else.
      thisOrder.insertBefore(totalElement, thisOrder.getFirstChild());


3.6 Outputting a Document

  File newFile = new File("processedOrders.xml");
  FileWriter newFileStream = new FileWriter(newFile);
  newFileStream.write("<?xml version=\"1.0\"?>");
  newFileStream.write("<!DOCTYPE "+doc.getDoctype().getName()+" ");
  if (doc.getDoctype().getSystemId() != null) 
    newFileStream.write(" SYSTEM ");
  if (doc.getDoctype().getPublicId() != null) 
    newFileStream.write(" PUBLIC ");


} catch (IOException e) {
  System.out.println("Can't write new file.");   

4 Validating

4.1 Doctype

4.2 Xerces Validation

  1. Create a parser.
  2. Turn on validation.
  3. Set the error handler.
  4. Parse the document.

4.3 Error Handler

import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.SAXParseException;

/** A simple error handlier which just prints out
    the errors. */
public class ErrorChecker extends DefaultHandler

  public ErrorChecker() {
  public void error (SAXParseException e) {
    System.out.println("Parsing error:  "+e.getMessage());

  public void warning (SAXParseException e) {
    System.out.println("Parsing problem:  "+e.getMessage());

  public void fatalError (SAXParseException e) {
    System.out.println("Parsing error:  "+e.getMessage());
    System.out.println("Cannot continue.");

4.4 Xerces Validator

import org.apache.xerces.parsers.DOMParser;
import java.io.File;
import org.w3c.dom.Document;

public class SchemaTest {
  public static void main (String args[]) {
    File docFile = new File("memory.xml");
    try {

      DOMParser parser = new DOMParser();
      parser.setFeature("http://xml.org/sax/features/validation", true); 

      //Here we specificy the schema location,
      //but we could have used the ones specified in the document.
      ErrorChecker errors = new ErrorChecker();

    } catch (Exception e) {
      System.out.print("Problem parsing the file.");


