See Handling elements: startElement See Handling elements: endElement See Handling non-element data: charData XMLAPI XML * xml_read (FILE * file) { XML_Parser parser; char buf[BUFSIZ]; int done; XML * ret; size_t len; ret = NULL; parser = XML_ParserCreate(NULL); XML_SetUserData (parser, (void *) &ret); XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, charData); done = 0; do { len = fread(buf, 1, sizeof(buf), file); done = len < sizeof(buf); if (!XML_Parse(parser, buf, len, done)) { xml_free (ret); XML_ParserFree(parser); return NULL; } } while (!done); XML_ParserFree(parser); return (ret); } |
xml_read
function was my first expat application, so I wasn't too sure how to
handle error cases. Originally it wrote an error function to stderr
, but that's worse than useless
in most situations. So now the original xml_read
function simply throws up its hands and returns a
NULL pointer, but if you need error feedback, use xml_read_error
, which in case of error returns something
of the form:
<xml-error message="This is the message" code="400" line="19"/>
XMLAPI XML * xml_read_error (FILE * file) { XML_Parser parser; char buf[BUFSIZ]; int done; XML * ret; size_t len; ret = NULL; parser = XML_ParserCreate(NULL); XML_SetUserData (parser, (void *) &ret); XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, charData); done = 0; do { len = fread(buf, 1, sizeof(buf), file); done = len < sizeof(buf); if (!XML_Parse(parser, buf, len, done)) { xml_free (ret); ret = xml_create ("xml-error"); xml_setnum (ret, "code", XML_GetErrorCode(parser)); xml_set (ret, "message", XML_ErrorString(XML_GetErrorCode(parser))); xml_setnum (ret, "line", XML_GetCurrentLineNumber(parser)); done = 1; } } while (!done); XML_ParserFree(parser); return (ret); } |
xml_parse
, which reads an XML structure from a string
instead of from a file stream. It does the same error handling as xml_read_error
, because that's really probably the
way to handle errors. I suppose if you might or might not be reading actual string encodings of xml-error
elements,
you might find this inconvenient. Try looking at the string before you parse it, I suppose.
XMLAPI XML * xml_parse (const char * buf) { XML_Parser parser; int done; XML * ret; size_t len; ret = NULL; parser = XML_ParserCreate(NULL); XML_SetUserData (parser, (void *) &ret); XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, charData); done = 0; len = strlen (buf); if (!XML_Parse(parser, buf, len, done)) { xml_free (ret); ret = xml_create ("xml-error"); xml_setnum (ret, "code", XML_GetErrorCode(parser)); xml_set (ret, "message", XML_ErrorString(XML_GetErrorCode(parser))); xml_setnum (ret, "line", XML_GetCurrentLineNumber(parser)); } XML_ParserFree(parser); return (ret); } |
fread
does, except that the function takes a void * as its data.
XMLAPI XML * xml_parse_general (void * data, size_t (*get_buf) (char * buf, size_t chunk, size_t num, void *data)) { XML_Parser parser; char buf[BUFSIZ]; int done; XML * ret; size_t len; ret = NULL; parser = XML_ParserCreate(NULL); XML_SetUserData (parser, (void *) &ret); XML_SetElementHandler(parser, startElement, endElement); XML_SetCharacterDataHandler(parser, charData); done = 0; do { len = (*get_buf)(buf, 1, sizeof(buf), data); done = len < sizeof(buf)-1; /* Worst that can happen: we call get_buf for an empty buffer. */ if (len > strlen (buf)) len = strlen (buf); if (len == 0) break; if (!XML_Parse(parser, buf, len, done)) { if (ret) xml_free (ret); ret = xml_create ("xml-error"); xml_setnum (ret, "code", XML_GetErrorCode(parser)); xml_set (ret, "message", XML_ErrorString(XML_GetErrorCode(parser))); xml_setnum (ret, "line", XML_GetCurrentLineNumber(parser)); done = 1; } } while (!done); XML_ParserFree(parser); if (!ret) { ret = xml_create ("xml-error"); xml_setnum (ret, "code", 3); xml_set (ret, "message", "no data received"); xml_setnum (ret, "line", 0); } return (ret); } |
startElement
handler, then, does a great deal of the work of creating
XML data structures. The userData
parameter points to the immediate parent
of the node being encountered. When we open a new node, we allocate the data structure
and copy attributes, append the new node to its parent, then we set userData to point to the
new node -- when the element closes, we move userData up the chain back to the parent.
In the case of an empty element, expat fortunately calls first the open handler, then the
close handler, so whether we have an explicitly empty element or not doesn't matter.
It's astounding how much simpler this startElement
is than the corresponding
handler in xmltools!
An interesting note: originally I had the call to xml_set
below incrementing
the atts
pointer twice, like **atts++, *atts++
. This worked fine on
Solaris with gcc, but oddly, when I took it to Windows with MSVC, it appeared not to increment
until after the call. Must be a slightly overzealous "optimization"... At any rate, the new code
works fine.
void startElement(void *userData, const char *name, const char **atts) { XML ** parent; XML * element; element = xml_create (name); while (*atts) { xml_set(element, atts[0], atts[1]); atts += 2; } parent = (XML **) userData; if (*parent != NULL) xml_append (*parent, element); *parent = element; } |
void endElement(void *userData, const char *name) { XML ** element; element = (XML **) userData; if ((*element)->parent != NULL) *element = (*element)->parent; } |
void charData (void *userData, const XML_Char *s, int len) { XML ** parent; parent = (XML **) userData; xml_append (*parent, xml_createtextlen ((char *) s, len)); } |
This code and documentation are released under the terms of the GNU license. They are additionally copyright (c) 2000, Vivtek. All rights reserved except those explicitly granted under the terms of the GNU license. This presentation was created using LPML. |