Monday, June 28, 2010

XML Parsing with Namespaces via PL/SQL

My previous post covered the basics for parsing simple XML in PL/SQL, but most XML has one or more namespaces associated with it. So how do you deal with that? Simple pretty much. Oracle provides a third parm (usually it is the third) to pass namespace URI information along to the underlying parser. I've taken the previous XML and added two namespaces to it and updated the appropriate calls to make this example.
DECLARE
l_doc VARCHAR2(2000);
l_domdoc dbms_xmldom.DOMDocument;
l_nodelist dbms_xmldom.DOMNodeList;
l_node dbms_xmldom.DOMNode;
l_value VARCHAR2(30);
l_a_ns VARCHAR2(30);
l_both_ns VARCHAR2(60);

l_xmltype XMLTYPE;
l_empx XMLTYPE;
l_index PLS_INTEGER;
l_col_ind PLS_INTEGER;
BEGIN
l_doc := '<a:employees xmlns:a="abc.com/123" xmlns:b="xyz.net/456">
<a:emp>
<a:name>Scott</a:name>
<b:favorites>
<b:color>red</b:color>
<b:color>orange</b:color>
</b:favorites>
</a:emp>
<a:emp>
<a:name>John</a:name>
<b:favorites>
<b:color>blue</b:color>
<b:color>green</b:color>
</b:favorites>
</a:emp>
</a:employees>';

l_domdoc := dbms_xmldom.newDomDocument(l_doc);
l_a_ns := 'xmlns:a="abc.com/123"';
l_both_ns := l_a_ns || ' xmlns:b="xyz.net/456"'; -- separated by a space

-- Method 1
dbms_output.put_line('Method 1');
-- third parm for namespace info
l_nodelist := dbms_xslprocessor.selectNodes(dbms_xmldom.makeNode(l_domdoc),'/a:employees/a:emp/a:name', l_a_ns);
FOR cur_emp IN 0 .. dbms_xmldom.getLength(l_nodelist) - 1 LOOP
l_node := dbms_xmldom.item(l_nodelist, cur_emp);
l_value := dbms_xmldom.getnodevalue(dbms_xmldom.getfirstchild(l_node));
dbms_output.put_line('Emp Name: '||l_value);
END LOOP;

-- Method 2
dbms_output.new_line;
dbms_output.put_line('Method 2');
l_nodelist := dbms_xmldom.getelementsbytagname(l_domdoc, 'name'); -- This doesn't care about namespaces
-- get first item from list, could loop as shown above
l_node := dbms_xmldom.item(l_nodelist, 0);
l_value := dbms_xmldom.getnodevalue(dbms_xmldom.getfirstchild(l_node));
dbms_output.put_line('Emp Name: '||l_value);

-- Done with DOMDocument examples, setup for XMLType based examples
dbms_xmldom.freeDocument(l_domdoc);
l_xmltype := XMLTYPE(l_doc);

-- Method 3
dbms_output.new_line;
dbms_output.put_line('Method 3');
l_index := 1;
WHILE l_xmltype.Existsnode('/a:employees/a:emp[' || To_Char(l_index) || ']', l_a_ns) > 0
LOOP
l_value := l_xmltype.extract('/a:employees/a:emp[' || To_Char(l_index) || ']/a:name/text()', l_a_ns).getStringVal();
dbms_output.put_line('Emp Name: '||l_value);
l_index := l_index + 1;
END LOOP;

-- Method 4
dbms_output.new_line;
dbms_output.put_line('Method 4');
l_value := l_xmltype.extract('/a:employees/a:emp[2]/a:name/text()', l_a_ns).getStringVal();
dbms_output.put_line('Emp Name: '||l_value);

-- Method 5
dbms_output.new_line;
dbms_output.put_line('Method 5');
l_index := 1;
WHILE l_xmltype.Existsnode('/a:employees/a:emp[' || To_Char(l_index) || ']', l_a_ns) > 0
LOOP
l_empx := l_xmltype.extract('/a:employees/a:emp[' || To_Char(l_index) || ']', l_a_ns);
l_col_ind := 1;
WHILE l_empx.Existsnode('/a:emp/b:favorites/b:color[' || To_Char(l_col_ind) || ']', l_both_ns) > 0
LOOP
l_value := l_empx.extract('/a:emp/b:favorites/b:color[' || To_Char(l_col_ind) || ']/text()', l_both_ns).getStringVal();
dbms_output.put_line('Color: '||l_value);
l_col_ind := l_col_ind + 1;
END LOOP;
l_index := l_index + 1;
END LOOP;
END;
Running this script produces
Method 1
Emp Name: Scott
Emp Name: John

Method 2
Emp Name: Scott

Method 3
Emp Name: Scott
Emp Name: John

Method 4
Emp Name: John

Method 5
Color: red
Color: orange
Color: blue
Color: green
which is exactly the same output as the previous example. This is what we expected. The only call that did not have to change was the one for Method 2. DBMS_XMLDOM is pretty much ignores namespaces when performing operations so this does have useful situations. As Methods 1, 3, and 4 show, the namespace parm only needs to contain the URI information for namespaces referenced in the Xpath, not in the XML. All the namespaces information can be passed along, but it is not needed. Pretty simple, excluding the whole "wait, what namespace is that node in again?" issue.

1 comment: