xml_write
)
and the other writes just the content of the element (xml_writecontent
).
October 13, 2001: So I've just realized where I cut a corner, and it's bit me painfully.
I'm simply writing the content of strings without thinking -- but I need to escape any ampersands
and angle brackets. Otherwise I can't parse what I write, and that's pretty boneheaded. And the
worst of it is, I remember thinking, back a couple years ago, "I need to come back and fix
this later." What an idiot I was. Hear me, past me? You're an idiot! So here's a little
helper function to escape strings.
(October 14, 2001): And now the UTF-8 thing has bitten me. Expat uses UTF-8 internally, meaning
that all data returned during the parse is UTF-8 encoded. I never really realized that. Anyway,
when escaping data which is greater than 127, we are really working with multi-byte characters. See
the default case in the switch below to see what I mean.
(April 13, 2002): Finally I'm to the point where this is getting tested (as the spam trap at
Despammed.com) -- and of course it is completely broken. My
reference this time around is here. The
upshot of UTF-8 is that values over 0x7F are broken into pieces and spread over multiple bytes.
The first byte contains an indication of how long the whole thing is, and subsequent bytes fill in
the rest of the data. Subsequent bytes are all of the form 10xxxxxx, thus giving six bits apiece
to the end result. The first byte supplies varying amounts depending on the total length. The
whole thing all in all is capable of representing the entire four-byte UCS character space, which
of course all the language on Earth together don't fill up. It's a neat system. Read up on it.
(March 2, 2004): Something I've been meaning to do for a while: I'm rewriting all this
to work with general writer function, much like xml_parse_general uses a general reader function.
This allows me to write directly to stream-like things (like Oracle LOBs) without first building
a string in memory, and it'll also help us get independent of runtime dependencies under Windows,
which have been an irritant for a long time.
void _xml_fwrite_escaped (void * data, XMLAPI_DATAWRITE write, char * str) { int len; long value; char * mark; char numbuf[20]; do { mark = _xml_danger_char (str); if (mark) { if (mark != str) (*write) (str, 1, mark - str, data); len = 1; switch (*mark) { case '&': (*write) ("&", 1, 5, data); break; case '<': (*write) ("<", 1, 4, data); break; case '>': (*write) (">", 1, 4, data); break; case '"': (*write) (""", 1, 6, data); break; default: if ((*mark & 0x00E0) == 0xC0) { /* two bytes: marker is 110x xxxx */ len = 2; value = (mark[0] & 0x001F) * 64 + (mark[1] & 0x003F); } else if ((*mark & 0x00F0) == 0xE0) { /* three bytes: marker is 1110 xxxx */ len = 3; value = ((mark[0] & 0x000F) * 64 + (mark[1] & 0x003F)) * 64 + (mark[2] & 0x003F); } else if ((*mark & 0x00F8) == 0xF0) { /* four bytes: marker is 1111 0xxx */ len = 4; value = (((mark[0] & 0x0007) * 64 + (mark[1] & 0x003F)) * 64 + (mark[2] & 0x003F)) * 64 + (mark[3] & 0x003F); } else if ((*mark & 0x00FC) == 0xF8) { /* five bytes: marker is 1111 10xx */ len = 5; value = ((((mark[0] & 0x0003) * 64 + (mark[1] & 0x003F)) * 64 + (mark[2] & 0x003F)) * 64 + (mark[3] & 0x003F)) * 64 + (mark[4] & 0x003F); } else if ((*mark & 0x00FE) == 0xFC) { /* six bytes: marker is 1111 110x */ len = 6; value = (((((mark[0] & 0x0001) * 64 + (mark[1] & 0x003F)) * 64 + (mark[2] & 0x003F)) * 64 + (mark[3] & 0x003F)) * 64 + (mark[4] & 0x003F)) * 64 + (mark[5] & 0x003F); } else { value = 0x20; /* Illegal value, but instead of freaking out we'll do something quasi-normal. */ } sprintf (numbuf, "&#%ld;", value); (*write) (numbuf, 1, strlen(numbuf), data); } str = mark + len; } } while (mark); len = strlen (str); if (len) (*write) (str, 1, len, data); } void _xml_fwrite (void * data, XMLAPI_DATAWRITE write, char * str) { int len; len = strlen (str); if (len) (*write) (str, 1, len, data); } |
XMLAPI void xml_write (FILE * file, XML * xml) { xml_write_general ((void *) file, (XMLAPI_DATAWRITE) fwrite, xml); } XMLAPI void xml_write_general (void * data, XMLAPI_DATAWRITE write, XML * xml) { ATTR * attr; ELEMENTLIST * list; if (!xml) return; |
if (xml->name == NULL) { if (xml->attrs) if (xml->attrs->value) _xml_fwrite_escaped (data, write, xml->attrs->value); return; } |
_xml_fwrite (data, write, "<"); _xml_fwrite (data, write, xml->name); attr = xml->attrs; while (attr != NULL) { _xml_fwrite (data, write, " "); _xml_fwrite (data, write, attr->name); _xml_fwrite (data, write, "=\""); _xml_fwrite_escaped (data, write, attr->value); _xml_fwrite (data, write, "\""); attr = attr->next; } |
if (xml->children == NULL) { _xml_fwrite (data, write, "/>"); return; } else _xml_fwrite (data, write, ">"); |
xml_writecontent_general (data, write, xml); |
_xml_fwrite (data, write, "</"); _xml_fwrite (data, write, xml->name); _xml_fwrite (data, write, ">"); } |
XMLAPI void xml_writecontent (FILE * file, XML * xml) { xml_writecontent_general ((void *) file, (XMLAPI_DATAWRITE) fwrite, xml); } XMLAPI void xml_writecontent_general (void * data, XMLAPI_DATAWRITE write, XML * xml) { ELEMENTLIST * list; if (!xml) return; list = xml->children; while (list) { xml_write_general (data, write, list->element); list = list->next; } } |
XMLAPI void xml_writehtml (FILE * file, XML * xml) { xml_writehtml_general ((void *) file, (XMLAPI_DATAWRITE) fwrite, xml); } XMLAPI void xml_writehtml_general (void * data, XMLAPI_DATAWRITE write, XML * xml) { ATTR * attr; ELEMENTLIST * list; if (!xml) return; if (xml->name == NULL) { if (xml->attrs) if (xml->attrs->value) _xml_fwrite (data, write, xml->attrs->value); return; } if (xml_is (xml, "nbsp")) { _xml_fwrite (data, write, " "); return; } _xml_fwrite (data, write, "<"); _xml_fwrite (data, write, xml->name); attr = xml->attrs; while (attr != NULL) { _xml_fwrite (data, write, " "); _xml_fwrite (data, write, attr->name); _xml_fwrite (data, write, "=\""); _xml_fwrite_escaped (data, write, attr->value); _xml_fwrite (data, write, "\""); attr = attr->next; } |
_xml_fwrite (data, write, ">"); if (xml->children == NULL) { if (!strcmp (xml->name, "p") || !strcmp (xml->name, "a") || !strcmp (xml->name, "textarea")) { _xml_fwrite (data, write, "</"); _xml_fwrite (data, write, xml->name); _xml_fwrite (data, write, ">"); } return; } xml_writecontenthtml_general (data, write, xml); |
if (!strcmp (xml->name, "li") || !strcmp (xml->name, "opt")) { } else { _xml_fwrite (data, write, "</"); _xml_fwrite (data, write, xml->name); _xml_fwrite (data, write, ">"); } } |
XMLAPI void xml_writecontenthtml (FILE * file, XML * xml) { xml_writecontenthtml_general ((void *) file, (XMLAPI_DATAWRITE) fwrite, xml); } XMLAPI void xml_writecontenthtml_general (void * data, XMLAPI_DATAWRITE write, XML * xml) { ELEMENTLIST * list; if (!xml) return; list = xml->children; while (list) { xml_writehtml_general (data, write, list->element); list = list->next; } } |
XMLAPI int xml_output (char * f, XML * xml, int mode) { FILE * file; file = fopen (f, "w"); if (!file) return 0; switch (mode) { case 1: xml_writecontent (file, xml); break; case 2: xml_writehtml (file, xml); break; case 3: xml_writecontenthtml (file, xml); break; default: xml_write (file, xml); break; } fclose (file); return 1; } |
XMLAPI XML * xml_load (const char * spec, ...) { va_list args; char * filename; FILE * file; XML * ret; va_start (args, spec); filename = xml_string_formatv (spec, args); va_end (args); file = fopen (filename, "r"); free (filename); if (!file) { return NULL; } ret = xml_read_error (file); fclose (file); return ret; } XMLAPI int xml_save (XML * xml, const char * spec, ...) { va_list args; char * filename; FILE * file; XML * ret; va_start (args, spec); filename = xml_string_formatv (spec, args); va_end (args); file = fopen (filename, "w"); free (filename); if (!file) { return 1; } xml_write (file, xml); fprintf (file, "\n"); fclose (file); return 0; } |
This code and documentation are released under the terms of the GNU license. They are copyright (c) 2000-2003, Vivtek. All rights reserved except those explicitly granted under the terms of the GNU license. This presentation was created using LPML. |