xmlobj: xmlobj library

xmlobj library

[ Previous: xmlobj include file ] [ Top: xmlobj library ] [ Next: XML object storage ]

#include <stdio.h>
#include <string.h>
#include "xmlobj.h"

First and foremost, what we do with fields is get their values. Field value retrieval is pretty straightforward; the caveat is that the value is a malloc'd string which the caller must free. If the given field is found, then get first attempts to return a "value" attribute. If that fails, then the content of the field is converted into an HTML-formatted string (thus no escaping of UTF-8 characters) and returned.

If the given field isn't found, an effort is made to find a default value in the class definition.

If the field really can't be found, then a NULL pointer is returned, not a blank value like missing xml_attrval calls. This is rather important to note.

May 4, 2002: The first thing we do with fields, though, is to find them. This was using the xml_search function, but now we need some more flexibility. And it occurs to me that xml_search isn't what we should have been using anyway, as it would recurse into structure, which is clearly wrong. So instead, we have xmlobj_field (which creates the field if it can't find it) and xmlobj_is_field (which just tries to find the field and returns NULL if it can't.)

June 1, 2003: Added xmlobj_unset to the pantheon.

June 18, 2003: OK, now it's getting more complicated (as usual). I'm adding complex data structures to the mix, and so in addition to simple field names, xmlobj will also support subrecords with dotted naming, and lists of values or records using (parens) for numerical position in the list or [square brackets] for keyed access on the "id" attribute. This syntax is very similar to the xml_loc locator syntax, except that names don't only apply to element names but also to field names.

A list can either be stored in a separate element, like this:

<link id="person">
  <rec id="John">
     <field id="key">John</field>
     <field id="name">Smith</field>
  </rec>
  <rec id="Sandy">
     <field id="key">Sandy</field>
     <field id="name">Jones</field>
  </rec>
</link>

or it can be stored flat in the record, like this:

<person>
   <field id="key">John</field>
   <field id="name">Smith</field>
</person>
<person>
   <field id="key">Sandy</field>
   <field id="name">Jones</field>
</person>

Only the first allows keyed addressing, but in both instances, the field name "person(1).name" will return a value of "Jones". See?

The way we do this is by getting fancy with xmlobj_field and xmlobj_is_field. Dotted names are easy -- we just call ourselves recursively to find the parent, and then use that as a context for a local name. Finding list elements is another question -- since they can either be on the top level or one down in subrecords, we have to be rather careful. Take a look.

One complexity due to my having attacked this sort of thing more than once: a link field (i.e. a list of subrecords) can be a field element with an id, or a like-named element, or a "link" element with an id attribute. Thus if we're looking for addr(1), we have to look for "addr" elements, "link" elements with id=addr, and "field" elements with id=addr. Similarly, subrecords within link lists can either be "link-to" elements or "rec" elements. Sorry it's so messy, but otherwise I'd have to clean up some other projects and would surely screw something up. It's easier (for me) to make you work harder.

static int _field_match (XML * field, char * fieldname, char * key) {
   if (xml_is (field, fieldname) ||
       (xml_is (field, "field") || xml_is (field, "link"))
           && !strcmp (fieldname, xml_attrval (field, "id"))) {
      /* Provisional match. */
      return 1;
   }
   return 0;
}
static int _subrec_match (XML * field, char * fieldname, char * key) {
   if (xml_is (field, "rec") || xml_is (xml_parent (field), "link")) {
      /* Provisional match. */
      if (!key || !strcmp (key, xml_attrval (field, "id"))) {
         return 1;
      }
   }
   return 0;
}

XMLAPI XML * xmlobj_is_field (XML * obj, XML * class, const char * fieldname)
{
   XML * field;
   XML * subrec;
   int   offset = 0;
   int   indexing = 0;
   int   subrec_hit;
   char * key = NULL;
   char * copy = NULL;
   char * localfield;
   char * mark;
   XML * context;

   if (!obj) return (NULL);

   if (strcspn (fieldname, ".") < strlen (fieldname)) {  /* If we have dotted addressing, split on the last dot. */
      copy = xml_strdup (fieldname);
      localfield = strrchr (copy, '.');
      if (localfield) {
         *localfield = '\0';
         localfield++;
         context = xmlobj_is_field (obj, class, copy);
      }

      if (!context) {
         free (copy);
         return NULL;
      }
   } else {
      context = obj;
      localfield = (char *) fieldname;
   }


   /* The local field may contain indexing.  If so, split on the indexing character. */
   if (mark = strchr (localfield, '(')) {
      if (!copy) {
         copy = xml_strdup (localfield);
         localfield = copy;
         mark = strchr (localfield, '(');
      }
      *mark = '\0';
      mark++;
      offset = atoi (mark);
      indexing = 1;
      if (offset < 0) {  /* Negative index is the signal to create a new subrecord. */
         if (copy) free (copy);
         return NULL;
      }
   } else if (mark = strchr (localfield, '[')) {
      if (!copy) {
         copy = xml_strdup (localfield);
         localfield = copy;
         mark = strchr (localfield, '[');
      }
      *mark = '\0';
      key = mark + 1;
      mark = strchr (key, ']');
      if (mark) *mark = '\0';
      indexing = 1;
   }

   for (field = xml_firstelem (context); field; field = xml_nextelem (field)) {
      if (_field_match (field, localfield, key)) {
         /* On match, check subrecs with the same logic, to one level. */
         subrec_hit = 0;
         if (indexing) {
            for (subrec = xml_firstelem (field); subrec; subrec = xml_nextelem (subrec)) {
               if (_subrec_match (subrec, localfield, key)) {
                  if (!offset) {
                     if (copy) free (copy);
                     return (subrec);
                  }
                  offset --;
                  subrec_hit = 1;
               }
            }
         }

         /* No subrecs qualified, so if the main record *does*, do the right thing. */
         if (!subrec_hit && !xml_is (field, "link")) {
            if (!key || !strcmp (key, xml_attrval (field, "id"))) {
               if (!offset) {
                  if (copy) free (copy);
                  return (field);
               }
               offset --;
            }
         }
      }
   }

   if (copy) free (copy);
   return NULL;
}
XMLAPI void xmlobj_unset (XML * obj, XML * class, const char * fieldname)
{
   XML * field;

   field = xmlobj_is_field (obj, class, fieldname);
   if (!field) return;

   xml_delete_pretty (field);
}
XMLAPI XML * xmlobj_field (XML * obj, XML * class, const char * fieldname)
{
   XML * field;
   XML * fdef = NULL;
   char * copy = NULL;
   char * localfield;
   XML * context;
   XML * subrec = NULL;
   int   offset = 0;
   int   indexing = 0;
   int   subrec_hit;
   char * key = NULL;
   char * mark;

   if (!obj) return (NULL);

   if (strcspn (fieldname, ".") < strlen (fieldname)) {  /* If we have dotted addressing, split on the last dot. */
      copy = xml_strdup (fieldname);
      localfield = strrchr (copy, '.');
      if (localfield) {
         *localfield = '\0';
         localfield++;
         context = xmlobj_field (obj, class, copy);
      }

      if (!context) {
         free (copy);
         return NULL;
      }
   } else {
      context = obj;
      localfield = (char *) fieldname;
   }

   if (!(field = xmlobj_is_field (context, class, localfield))) {
      /* The local field may contain indexing.  If so, split on the indexing character. */
      if (mark = strchr (localfield, '(')) {
         if (!copy) {
            copy = xml_strdup (localfield);
            localfield = copy;
            mark = strchr (localfield, '(');
         }
         *mark = '\0';
         mark++;
         offset = atoi (mark);
         indexing = 1;
      } else if (mark = strchr (localfield, '[')) {
         if (!copy) {
            copy = xml_strdup (localfield);
            localfield = copy;
            mark = strchr (localfield, '[');
         }
         *mark = '\0';
         key = mark + 1;
         mark = strchr (key, ']');
         if (mark) *mark = '\0';
         indexing = 1;
      }

      if (class) fdef = xml_locf (class, ".field[%s]", fieldname); /* TODO: need to find defn for complex values. */
      if (*xml_attrval (fdef, "element")) {
         field = xml_create (localfield);
         if (key) xml_set (field, "id", key);
      } else {
         if (indexing) {
            field = xml_locf (context, ".link[%s]", localfield);
            if (!field) {
               field = xml_create ("link");
               xml_set (field, "id", localfield);
               xml_append_pretty (context, field);
            }
            subrec = xml_create ("rec");
            xml_append_pretty (field, subrec);
            if (key) xml_set (subrec, "id", key);
         } else {
            field = xml_create ("field");
            xml_set (field, "id", localfield);
         }
      }
      if (*xml_attrval (fdef, "ver-keep")) xml_set (subrec ? subrec : field, "ver", "0");
      if (!subrec) xml_append_pretty (context, field);
   }
   return subrec ? subrec : field;
}

And now we can get down to the business of getting and setting values. Note that we have extra logic in xmlobj_get to find dotted-pair notation for attributes of fields, corresponding to the logic for treating xml_attrval values as fields in the main record. (Once xmlobj_set is used to set a value, xml_attrval values will be ignored, but it's a convenient way to set up static records by hand and such.) d

XMLAPI char * xmlobj_get (XML * obj, XML * class, const char * field)
{
   XML * mark;
   char * val;
   char * copy;
   char * localfield;

   if (!obj) return (NULL);
   mark = xmlobj_is_field (obj, class, field);

   if (mark) return xmlobj_get_direct (mark);

   if (strrchr (field, '.')) {
      copy = xml_strdup (field);
      localfield = strrchr (copy, '.');
      *localfield = '\0';
      localfield++;
      mark = xmlobj_is_field (obj, class, copy);
      if (mark) {
         val = (char *) xml_attrval (mark, localfield);
         free (copy);
         if (*val) return (xml_strdup (val));
      } else {
         free (copy);
      }
   } else {
      val = (char *) xml_attrval (obj, field);
      if (*val) return (xml_strdup (val));
   }

   mark = xml_search (class, "field", "id", field);
   if (mark) {
      val = (char *) xml_attrval (mark, "default");
      if (*val) return (xml_strdup (val));
   }

   if (!strcmp (field, "key")) { /* Special handling for the pseudofield "key" */
      return xmlobj_getkey (obj, class);
   }

   return (NULL);
}
XMLAPI char * xmlobj_get_direct (XML * field)
{
   char * val;
   const char * ver;
   XML * v;

   if (!field) return NULL;

   ver = xml_attrval (field, "ver");
   if (*ver) {
      v = xmlobj_ver_direct (field, ver);
      if (v) field = v;
   }
   val = (char *) xml_attrval (field, "value");
   if (*val) return (xml_strdup (val));
   return xml_stringcontenthtml (field);
}
XMLAPI int xmlobj_getnum (XML * obj, XML * class, const char * field)
{
   int retval;
   char * val = xmlobj_get (obj, class, field);

   if (!val) return 0;
   retval = atoi (val);
   free (val);
   return (retval);
}

So logically, if we can get values, we probably want to set them as well. Here, though, we start to get a tad more sophisticated (or at least we will at some point) -- the class should tell us whether or not the value in question may be set (it may be readonly, virtual, or simply not in the view defined by the class, assuming the class is a strict one.)

XMLAPI int xmlobj_set (XML * obj, XML * class, const char * field, const char * value)
{
   XML * mark;
   int newver = 0;

   if (!obj) return 0;
   mark = xmlobj_field (obj, class, field);
   if (*xml_attrval (mark, "ver")) newver = xml_attrvalnum (mark, "ver") + 1;
   if (newver) {
      xml_setnum (mark, "ver", newver);
      mark = xmlobj_ver_direct (mark, xml_attrval (mark, "ver"));
   }
   xmlobj_set_direct (mark, value);
   /* TODO: if (newver) then get and apply retention rule. */
}
XMLAPI int xmlobj_set_direct (XML * field, const char * value)
{
   xml_unset (field, "value");
   xml_replacecontent (field, xml_createtext (value ? value : ""));

   return 1;
}
XMLAPI int xmlobj_set_nodup (XML * obj, XML * class, const char * field, char * value)
{
   XML * mark;
   int newver = 0;

   if (!obj) return 0;
   mark = xmlobj_field (obj, class, field);
   if (*xml_attrval (mark, "ver")) newver = xml_attrvalnum (mark, "ver") + 1;
   if (newver) {
      xml_setnum (mark, "ver", newver);
      mark = xmlobj_ver_direct (mark, xml_attrval (mark, "ver"));
   }
   xmlobj_set_direct_nodup (mark, value);
   /* TODO: if (newver) then get and apply retention rule. */

   return 1;
}
XMLAPI int xmlobj_set_direct_nodup (XML * field, char * value)
{
   xml_unset (field, "value");
   xml_replacecontent (field, xml_createtext_nodup (value ? value : xml_strdup ("")));

   return 1;
}
XMLAPI int xmlobj_set_elem (XML * obj, XML * class, const char * field, const char * value) /* TODO: this doesn't do complex values */
{
   XML * mark;

   if (!obj) return 0;
   mark = xmlobj_is_field (obj, class, field);
   if (!mark) {
      mark = xml_create (field);
      xml_append_pretty (obj, mark);
   }
   xml_replacecontent (mark, xml_createtext (value ? value : ""));

   return 1;
}
XMLAPI int xmlobj_set_elem_nodup (XML * obj, XML * class, const char * field, char * value)
{
   XML * mark;

   if (!obj) return 0;
   mark = xmlobj_is_field (obj, class, field);
   if (!mark) {
      mark = xml_create (field);
      xml_append_pretty (obj, mark);
   }
   xml_replacecontent (mark, xml_createtext_nodup (value ? value : xml_strdup("")));

   return 1;
}
XMLAPI int xmlobj_setnum (XML * obj, XML * class, const char * field, int value)
{
   XML * mark;
   char buf[sizeof(int)*3];

   if (!obj) return 0;
   mark = xml_search (obj, "field", "id", field);
   if (!mark) {
      mark = xml_create ("field");
      xml_set (mark, "id", field);
      xml_append_pretty (obj, mark);
   }

   sprintf (buf, "%d", value);
   xml_replacecontent (mark, xml_createtext (buf));

   return 1;
}

The next item on the agenda is formatting of short text strings. Such a string encloses field names in square brackets; these are interpolated with the values of the fields named. The current version is quite straightforward; there are, however, lots of neat extensions you could toss in here.

XMLAPI char * xmlobj_format (XML * obj, XML * class, const char * format)
{
   char * mark;
   int len;
   char * val;
   XML * s = xml_create ("s");

   xml_set (s, "s", "");

   mark = strchr (format, '[');
   while (mark) {
      xml_attrncat (s, "s", format, mark - format);
      format = mark + 1;
      xml_set (s, "name", "");
      mark = strchr (format, ']');
      if (mark) len = mark - format;
      else      len = strlen (format);
      xml_attrncat (s, "name", format, len);
      val = xmlobj_get (obj, class, xml_attrval (s, "name"));
      if (val) {
         xml_attrcat (s, "s", val);
         free (val);
      }
      format += len;
      if (*format) format++;
      mark = strchr (format, '[');
   }
   xml_attrcat (s, "s", format);

   mark = xml_strdup (xml_attrval (s, "s"));
   xml_free (s);
   return (mark);
}

The long text version of this is from the wftk API; I'll deal with it later. The template formatter just serves as a convenient wrapper for xml_template and I'll deal with that later, too.

XMLAPI       char * xmlobj_formatlong(XML * obj, XML * class, const char * format) { return NULL; }
XMLAPI       XML *  xmlobj_template  (XML * obj, XML * class, XML * template) { return NULL; }

Onwards; getting a key value is dependent on the class, of course, which should have a key definition. (August 30, 2002): Rather arbitrarily, I define keys as having no spaces, no capital letters, and no punctuation. This is mostly to avoid problems when using keys for filenames. Trust me on this, I've shot myself in the foot all too often with invalid filenames. If you really, really want to have keys with spaces, capital letters, or punctuation... well... this is open source for a reason, bub. Get to typing.

The way I enforce this heavy-handed decision is with the simple little function _xmlobj_fixkey. Since we know that our key is going to be in an allocated buffer, we can play around with it to fix up our key.

May 28, 2003: One often wonders just how long it takes to find all the bugs in a given piece of code....

May 28, 2003, second note: Hoist by me own petard. I need '~' as a valid key char for the taskindex.

June 7, 2003: Oops. Fixkey needs to be visible so that I can build keys externally.

XMLAPI void xmlobj_fixkey (char * key)
{
   char *scan;

   if (!key) return;
   do {
      if (!*key) return;
      if (*key >= 'A' && *key <= 'Z') *key += 'a' - 'A';
      if (*key == ' ') *key = '_';
      if (*key < '0' || *key > 'z' || (*key > '9' && *key < 'a' && *key != '_')) {
         if (*key != '~') { /* List of good punctuation.  There may be more.  Who knows? */
            scan = key;
            while (*scan) { *scan = scan[1]; scan++; }
            key --;
         }
      }
   } while (*(++key));
}
XMLAPI char * xmlobj_getkey (XML * obj, XML * class)
{
   const char * key = NULL;
   XML * mark;
   char * ret;

   if (class) key = xml_attrval (class, "key");

   if (key && *key) {
      if (strchr (key, '[')) {
         ret = xmlobj_format (obj, class, key);
         xmlobj_fixkey (ret);
         return ret;
      } else {
         /*ret = xmlobj_get (obj, class, key);  -- if key="key" this is a loop.... 2003-05-28 */
         mark = xmlobj_is_field (obj, class, key);
         ret = xmlobj_get_direct (mark);

         xmlobj_fixkey (ret);
         return ret;
      }
   }

   mark = xml_search (obj, "field", NULL, NULL);
   if (!mark) {
      return (xml_strdup ("none"));
   } else {
      key = xml_attrval (mark, "value");
      if (*key) {
         ret = xml_strdup (key);
         xmlobj_fixkey (ret);
         xml_set (obj, "key", ret);
         return (ret);
      } else {
         ret = xml_stringcontenthtml (mark);
         xmlobj_fixkey (ret);
         return (ret);
      }
   }
}

(23 Feb 2003): So it suddenly seems useful to have a configuration value reader. This is largely in support of the repository manager, of course, but in this case we do two things. First, we treat the repository definition as an xmlobj object so we can read a value -- but we ignore the class, and we pass in a convenient default value to be used if the field in question isn't defined. Second, when setting a value, if the "object" given has a "filename" attribute, then we use that attribute to read in a source object, set the value in it, and write it back out. We do not write the current object (although we also set the value in that object) because the repository definition ends up heavily modified from its original image on disk, during a typical repmgr run.

So all that being said, I'm not terribly sure how general the following four functions really are, but I'm trying to put them where they can be of most general use, so here they are:

XMLAPI char *  xmlobj_getconf    (XML * obj, const char * field, const char * deflt)
{
   XML * mark;
   char * val;

   if (!obj) return (xml_strdup (deflt));
   mark = xmlobj_is_field (obj, NULL, field);

   if (mark) return xmlobj_get_direct (mark);

   return (xml_strdup (deflt));
}
XMLAPI int     xmlobj_getconfnum (XML * obj, const char * field, int deflt)
{
   XML * mark;
   int retval;
   char * val;

   if (!obj) return (deflt);
   mark = xmlobj_is_field (obj, NULL, field);
   if (mark) {
      val = xmlobj_get_direct (mark);
      retval = atoi(val);
      free(val);
      return retval;
   }

   return deflt;
}
XMLAPI void    xmlobj_setconf    (XML * obj, const char * field, const char * value)
{
   FILE * f;
   XML * o;

   xmlobj_set (obj, NULL, field, value);

   if (*xml_attrval (obj, "filename")) {
      f = fopen (xml_attrval (obj, "filename"), "r");
      if (f) {
         o = xml_read (f);
         fclose (f);
         if (o) {
            xmlobj_set (o, NULL, field, value);
            f = fopen (xml_attrval (obj, "filename"), "w");
            if (f) {
               xml_write (f, o);
               fclose (f);
            }
            xml_free (o);
         }
      }
   }
}
XMLAPI void    xmlobj_setconfnum (XML * obj, const char * field, int value)
{
   FILE * f;
   XML * o;

   xmlobj_setnum (obj, NULL, field, value);

   if (*xml_attrval (obj, "filename")) {
      f = fopen (xml_attrval (obj, "filename"), "r");
      if (f) {
         o = xml_read (f);
         fclose (f);
         if (o) {
            xmlobj_setnum (o, NULL, field, value);
            f = fopen (xml_attrval (obj, "filename"), "w");
            if (f) {
               xml_write (f, o);
               fclose (f);
            }
            xml_free (o);
         }
      }
   }
}

Next up is the diff/patch functionality. This is only meaningful in the context of these records; it ignores any changes which don't have meaning as fields or links. The advantage is that order of fields/links, whitespace, and so forth are all completely transparent to this diff/patch functionality. The result is that when I go to modify a record I can store precisely the change made, allowing a rudimentary transaction log to be kept (which permits rollbacks, selective approval processes, and so forth. It's good stuff.)

Basically, record diff works like this:

Check for discrepancies in the attributes of the two records.
For fields in the edited record which are new to the original, make a note of attributes and values.
For fields in the original record which are missing in the edited version, do nothing.
For fields present in both records, make a note of changed attributes or values.
For links in the edited record which are new to the original, include the entire link.
For links in the original missing from the edited version, do nothing.
For links present in both records, scan the subrecords in the link and basically do the same with them.
For other XML elements, if they have "id" attributes and they're not present in the original, they'll be noted.

Note that this all means that a field which is missing in the changed record will not appear in the diff. The reason for this is simple: this is intended to support an XML field-based editing system, and partial records shouldn't have the ability to remove fields that they don't know about. If a field needs to be erased, the best you can do is to set its value to a blank -- but you can't remove a field once the record knows about it. Which is, of course, exactly the way we want it.

Links, mentioned in passing above, are not well supported by the rest of the API yet; they can be seen as lists of relationships with outside records. Typically they will simply be lists of keys, but they can include arbitrary other fields as well, to allow labelling of links. This section of the xmlobj library is under considerable flux at the moment.

XMLAPI XML * xmlobj_diff (XML * obj, XML * class, XML * changed)
{
   XML * diff = xml_create ("diff");
   XML * ofield;
   XML * cfield;
   XML * dfield;
   char * linkfield = NULL;
   XML_ATTR * attr = xml_attrfirst (changed);
   char * ovalue = NULL;
   char * cvalue = NULL;
   XML * olinkrec;
   XML * clinkrec;
   XML * dlinkrec;
   XML * olinkfld;
   XML * clinkfld;
   const char * key;

   while (attr) {
      if (strcmp (xml_attrval (obj, xml_attrname (attr)), xml_attrvalue (attr))) {
         xml_set (diff, xml_attrname (attr), xml_attrvalue (attr));
      }
      attr = xml_attrnext (attr);
   }

   cfield = xml_firstelem (changed);
   while (cfield) {
      if (xml_is (cfield, "field")) {
         ofield = xml_locf (obj, ".field[%s]", xml_attrval (cfield, "id"));
         if (!ofield) {
            xml_append_pretty (diff, xml_copy (cfield));
         } else {
            dfield = NULL;
            attr = xml_attrfirst (cfield);
            while (attr) {
               if (strcmp (xml_attrval (ofield, xml_attrname (attr)), xml_attrvalue (attr))) {
                  if (!dfield) { dfield = xml_create ("field"); xml_set (dfield, "id", xml_attrval (cfield, "id")); }
                  xml_set (dfield, xml_attrname (attr), xml_attrvalue (attr));
               }
               attr = xml_attrnext (attr);
            }
            ovalue = xmlobj_get (obj, class, xml_attrval (cfield, "id"));
            cvalue = xmlobj_get (changed, class, xml_attrval (cfield, "id"));
            if (!ovalue) {
               if (cvalue) {
                  if (!dfield) { dfield = xml_create ("field"); xml_set (dfield, "id", xml_attrval (cfield, "id")); }
                  xml_append (dfield, xml_createtext_nodup (cvalue));
                  cvalue = NULL;
               } else {
                  if (dfield) xml_set (dfield, "diff:content", "unchanged");
               }
            } else if (cvalue) {
               if (!strcmp (ovalue, cvalue)) {
                  if (dfield) xml_set (dfield, "diff:content", "unchanged");
               } else {
                  if (!dfield) { dfield = xml_create ("field"); xml_set (dfield, "id", xml_attrval (cfield, "id")); }
                  xml_append (dfield, xml_createtext_nodup (cvalue));
                  cvalue = NULL;
               }
            }
            if (ovalue) free (ovalue);
            if (cvalue) free (cvalue);
            if (dfield) xml_append_pretty (diff, dfield);
         }
      } else if (xml_is (cfield, "link")) {
         /* I need to document how links work, but I'm using them to represent many-to-many linking tables. */
         ofield = xml_locf (obj, ".link[%s]", xml_attrval (cfield, "id"));
         if (!ofield) {
            ofield = xml_copy (cfield);
            olinkrec = xml_firstelem (ofield);
            while (olinkrec) {
               xml_set (olinkrec, "diff:action", "add");
               olinkrec = xml_nextelem (olinkrec);
            }
            xml_append_pretty (diff, ofield);
            
         } else {
            dfield = NULL;

            /* Find key of link; if a link is named e.g. id="My Table:local_id" then "local_id" is the key field of the link,
               which corresponds to the key of the current record.  If this field is included in the link record, it should
               actually be completely ignored. */
            key = xml_attrval (cfield, "id");
            key = strchr (key, ':');
            if (key) key++;

            /* Scan original for deleted links. */
            olinkrec = xml_firstelem (ofield);
            while (olinkrec) {
               clinkrec = xml_firstelem (cfield);
               while (clinkrec) {
                  clinkfld = xml_firstelem (clinkrec);
                  while (clinkfld) {
                     if (key && !strcmp (key, xml_attrval (clinkfld, "id"))) {
                        clinkfld = xml_nextelem (clinkfld);
                        continue;
                     }
                     cvalue = xmlobj_get (clinkrec, NULL, xml_attrval (clinkfld, "id"));
                     ovalue = xmlobj_get (olinkrec, NULL, xml_attrval (clinkfld, "id"));
                     if (!cvalue && !ovalue) {
                        clinkfld = xml_nextelem (clinkfld);
                        continue;
                     }
                     if (!cvalue) {
                        free (ovalue);
                        break;
                     }
                     if (!ovalue) {
                        free (cvalue);
                        break;
                     }
                     if (strcmp (cvalue, ovalue)) {
                        free (ovalue); free (cvalue);
                        break;
                     }
                     free (ovalue); free (cvalue);
                     clinkfld = xml_nextelem (clinkfld);
                  }
                  if (!clinkfld) { /* All fields matched, so the records match. */
                     break;
                  }
                  /* Otherwise, keep looking for a matching field. */  
                  clinkrec = xml_nextelem (clinkrec);
               }
               if (!clinkrec) { /* No link records matched, so this olinkrec was deleted. */
                  if (!dfield) {
                     dfield = xml_create ("link");
                     xml_copyattrs (dfield, cfield);
                  }
                  dlinkrec = xml_create (xml_name (olinkrec));
                  xml_append_pretty (dfield, dlinkrec);
                  xml_set (dlinkrec, "diff:action", "del");
                  olinkfld = xml_firstelem (olinkrec);
                  while (olinkfld) {
                     xml_append_pretty (dlinkrec, xml_copy (olinkfld));
                     olinkfld = xml_nextelem (olinkfld);
                  }
               }
               /* Now check next link record. */
               olinkrec = xml_nextelem (olinkrec);
            }

            /* Scan edited for added links. */
            clinkrec = xml_firstelem (cfield);
            while (clinkrec) {
               olinkrec = xml_firstelem (ofield);
               while (olinkrec) {
                  clinkfld = xml_firstelem (clinkrec);
                  while (clinkfld) {
                     if (key && !strcmp (key, xml_attrval (clinkfld, "id"))) {
                        clinkfld = xml_nextelem (clinkfld);
                        continue;
                     }
                     cvalue = xmlobj_get (clinkrec, NULL, xml_attrval (clinkfld, "id"));
                     ovalue = xmlobj_get (olinkrec, NULL, xml_attrval (clinkfld, "id"));
                     if (!cvalue && !ovalue) {
                        clinkfld = xml_nextelem (clinkfld);
                        continue;
                     }
                     if (!cvalue) {
                        free (ovalue);
                        break;
                     }
                     if (!ovalue) {
                        free (cvalue);
                        break;
                     }
                     if (strcmp (cvalue, ovalue)) {
                        free (ovalue); free (cvalue);
                        break;
                     }
                     free (ovalue); free (cvalue);
                     clinkfld = xml_nextelem (clinkfld);
                  }
                  if (!clinkfld) { /* All fields matched, so the records match. */
                     break;
                  }
                  /* Otherwise, keep looking for a matching field. */  
                  olinkrec = xml_nextelem (olinkrec);
               }
               if (!olinkrec) { /* No link records matched, so this clinkrec was added. */
                  if (!dfield) {
                     dfield = xml_create ("link");
                     xml_copyattrs (dfield, cfield);
                  }
                  dlinkrec = xml_create (xml_name (clinkrec));
                  xml_append_pretty (dfield, dlinkrec);
                  xml_set (dlinkrec, "diff:action", "add");
                  clinkfld = xml_firstelem (clinkrec);
                  while (clinkfld) {
                     xml_append_pretty (dlinkrec, xml_copy (clinkfld));
                     clinkfld = xml_nextelem (clinkfld);
                  }
               }
               /* Now check next link record. */
               clinkrec = xml_nextelem (clinkrec);
            }

            /* Scan original for matches that are possibly changed -- TODO (requires reference to record class) */

            /* Finally, if we've accumulated any changes, write them to the diff. */
            if (dfield) xml_append_pretty (diff, dfield);
         }
      } else {
         /* For arbitrary content, I'm not entirely sure what to do.  One thing is obvious, though; arbitrary content
            which does not yet exist in the original object should be copied (that keeps things consistent with the above. ) */
         ofield = xml_locf (obj, ".%s[%s]", xml_name (cfield), xml_attrval (cfield, "id"));
         if (!ofield) {
            xml_append (diff, xml_copy (cfield));
         } else {
            /* ??? TODO: figure out if there's anything rational we can do here. */
         }
      }

      cfield = xml_nextelem (cfield);
   }
   return diff;
}

Application of a diff (the "patch" operation) is pretty straightforward, then. We just step down through the changes noted, and apply them. The process also results in an "undiff" which we return, so that we can simply and easily back out of changes. Thus xmlobj_patch (orig, xmlobj_patch (orig, diff)) should leave orig substantially the same as it started out, although there may be some whitespace changes.

If we only want to generate the undiff record without changing the original, we use "undiff".

static XML * _xmlobj_patch (XML * orig, XML * diff, int change_orig);
XMLAPI XML * xmlobj_patch (XML * obj, XML * class, XML * diff)
{
   return _xmlobj_patch (obj, diff, 1);
}
XMLAPI XML * xmlobj_undiff (XML * obj, XML * class, XML * diff)
{
   return _xmlobj_patch (obj, diff, 0);
}

And of course both of those call the same function anyway (so they should actually be macros, right?)

October 1, 2002: Stupid oversight -- I wasn't changing diff'd attributes of the root element. Sheesh.

XML * _xmlobj_patch (XML * orig, XML * diff, int change_orig)
{
   XML * undiff = xml_create ("diff");
   char * value;
   XML * scan;
   XML * mark;
   XML_ATTR * attr = xml_attrfirst (diff);

   XML * olist;
   XML * ulist;
   XML * olinkrec;
   XML * dlinkfld;
   XML * olinkfld;
   char * dvalue;
   char * ovalue;
   XML * undifflink;

   while (attr) {
      xml_set (undiff, xml_attrname (attr), xml_attrval (orig, xml_attrname (attr)));
      if (change_orig) {
         xml_set (orig, xml_attrname (attr), xml_attrval (diff, xml_attrname (attr)));
      }
      attr = xml_attrnext (attr);
   }

   scan = xml_firstelem (diff);
   while (scan) {
      if (xml_is (scan, "field")) {
         /* Patching fields is easy. */
         value = xmlobj_get (orig, NULL, xml_attrval (scan, "id"));
         if (value) {
            xmlobj_set (undiff, NULL, xml_attrval (scan, "id"), value);
            free ((void *) value);
         } else {
            xmlobj_set (undiff, NULL, xml_attrval (scan, "id"), "");
         }

         if (change_orig) {
            value = xmlobj_get (diff, NULL, xml_attrval (scan, "id"));
            xmlobj_set (orig, NULL, xml_attrval (scan, "id"), value);
            free ((void *) value);
         }
      } else if (xml_is (scan, "link")) {
         /* To patch lists, we first make sure we've got the list in the undiff and orig. */
         olist = xml_locf (orig, ".link[%s]", xml_attrval (scan, "id"));
         if (change_orig && !olist) {
            olist = xml_create ("link");
            xml_copyattrs (olist, scan);
            xml_append_pretty (orig, olist);
         }
         ulist = xml_create ("link");
         xml_copyattrs (ulist, scan);
         xml_append_pretty (undiff, ulist);

         /* Now we scan the list *change* spec, and undiff/patch as required. */
         mark = xml_firstelem (scan);
         while (mark) {
            if (xml_is (mark, "link-to")) {
               if (!strcmp (xml_attrval (mark, "diff:action"), "add")) {
                  olinkrec = NULL;
                  if (olist) {
                     /* Scan olist for link to be sure it's not there. */
                     olinkrec = xml_firstelem (olist);
                     while (olinkrec) {
                        if (xml_is (olinkrec, "link-to")) {
                           dlinkfld = xml_firstelem (mark);
                           while (dlinkfld) {
                              dvalue = xmlobj_get (mark, NULL, xml_attrval (dlinkfld, "id"));
                              ovalue = xmlobj_get (olinkrec, NULL, xml_attrval (dlinkfld, "id"));
                              if (!dvalue && !ovalue) {
                              } else if (!dvalue) {
                                 free ((void *) ovalue);
                                 break;
                              } else if (!ovalue) {
                                 free ((void *) dvalue);
                                 break;
                              } else {
                                 if (strcmp (dvalue, ovalue)) {
                                    free ((void *) dvalue);
                                    free ((void *) ovalue);
                                    break;
                                 }
                                 free ((void *) dvalue);
                                 free ((void *) ovalue);
                              }
                              dlinkfld = xml_nextelem (dlinkfld);
                           }
                           if (!dlinkfld) break;
                        }
                        olinkrec = xml_nextelem (olinkrec);
                     }
                  }
                  if (!olinkrec) { /* We make an add only if there is no pre-existing identical link. */
                     undifflink = xml_copy (mark);
                     xml_set (undifflink, "diff:action", "del");
                     xml_append_pretty (ulist, undifflink);
                     if (change_orig) {
                        undifflink = xml_copy (mark);
                        xml_unset (undifflink, "diff:action");
                        xml_append_pretty (olist, undifflink);
                     }
                  }
               } else if (!strcmp (xml_attrval (mark, "diff:action"), "del")) {
                  if (olist) {
                     /* Scan olist for link to be sure it *is* there. */
                     olinkrec = xml_firstelem (olist);
                     while (olinkrec) {
                        if (xml_is (olinkrec, "link-to")) {
                           dlinkfld = xml_firstelem (mark);
                           while (dlinkfld) {
                              dvalue = xmlobj_get (mark, NULL, xml_attrval (dlinkfld, "id"));
                              ovalue = xmlobj_get (olinkrec, NULL, xml_attrval (dlinkfld, "id"));
                              if (!dvalue && !ovalue) {
                              } else if (!dvalue) {
                                 free ((void *) ovalue);
                                 break;
                              } else if (!ovalue) {
                                 free ((void *) dvalue);
                                 break;
                              } else {
                                 if (strcmp (dvalue, ovalue)) {
                                    free ((void *) dvalue);
                                    free ((void *) ovalue);
                                    break;
                                 }
                                 free ((void *) dvalue);
                                 free ((void *) ovalue);
                              }
                              dlinkfld = xml_nextelem (dlinkfld);
                           }
                           if (!dlinkfld) break;
                        }
                        olinkrec = xml_nextelem (olinkrec);
                     }
                     if (olinkrec) {
                        undifflink = xml_copy (olinkrec);
                        xml_set (undifflink, "diff:action", "add");
                        xml_append_pretty (ulist, undifflink);
                        if (change_orig) {
                           xml_delete (olinkrec);
                        }
                     }
                  }
               }
            }
            mark = xml_nextelem (mark);
         }
      } else {
         /* For patching arbitrary content, we're fine -- but we don't have any way of removing it, so
            the undiff simply won't show it.  TODO: think about whether this could make more sense. */
         if (change_orig) {
            mark = xml_search (orig, xml_name (scan), "id", xml_attrval (scan, "id"));
            if (!mark) {
               xml_append_pretty (orig, xml_copy (scan));
            }
         }
      }

      scan = xml_nextelem (scan);
   }

   return undiff;
}

(August 13, 2002): It's frightening to look at these dates sometimes and realize how long I've been doing this. Anyway, today I need versioning functionality. I've revamped the versioning API entirely and it's really pretty straightforward at this point.

Note that most of the maintenance of versions takes place in xmlobj_get and xmlobj_set transparently. Only in fairly rare instances will we need to call the versioning API directly.

XMLAPI XML  * xmlobj_ver        (XML * obj, XML * class, const char * field, const char * ver)
{
   XML * fld = xmlobj_field (obj, class, field);
   return (xmlobj_ver_direct (fld, ver));
}
XMLAPI XML  * xmlobj_ver_direct (XML * field, const char * ver)
{
   XML * v = xml_firstelem (field);
   while (v) {
      if (xml_is (v, "ver")) {
         if (!strcmp (ver, xml_attrval (v, "id"))) return v;
      }
      v = xml_nextelem (v);       
   }
   v = xml_create ("ver");
   xml_append_pretty (field, v);
   xml_set (v, "id", ver);
   return v;
}
XMLAPI char * xmlobj_getver     (XML * obj, XML * class, const char * field, const char * ver)
{
   XML * v = xmlobj_ver (obj, class, field, ver);
   return (xmlobj_get_direct (v));
}
XMLAPI const char * xmlobj_curver (XML * obj, XML * class, const char * field)
{
   XML * fld = xmlobj_field (obj, class, field);
   return (xml_attrval (fld, "ver"));
}
XMLAPI XML  * xmlobj_setver     (XML * obj, XML * class, const char * field, const char * ver)
{
   XML * fld = xmlobj_field (obj, class, field);
   xml_set (fld, "ver", ver);
   return (xmlobj_ver_direct (fld, ver));
}
XMLAPI XML * xmlobj_newver (XML * obj, XML * class, const char * field)
{
   XML * fld = xmlobj_field (obj, class, field);
   return (xmlobj_newver_direct (fld));
}
XMLAPI XML * xmlobj_newver_direct (XML * field)
{
   xml_setnum (field, "ver", xml_attrvalnum (field, "ver") + 1);
   return (xmlobj_ver_direct (field, xml_attrval (field, "ver")));
}

(December 30, 2002) So my latest trick is to sort lists of xmlobj records. This is actually a lot easier than it looks, since qsort is available to do the work of sorting -- all we have to do is to compare two records. Easy! And of course most of the work was already done during last year when I wrote the xml_sort function for the XMLAPI. This is just a little more structured, so that it can take advantage of knowledge about field types and such.

The "order" parameter is simply a string in the SQL standard format for the order clause. Thus we can sort on "priority desc, entry_date" with no further ado. The function first parses the order specifier into a sort description, and then goes on to build the sort array and call qsort.

struct _xmlobj_sort_hdl {
  XML * sort;
  XML * class;
  XML * elem;
};
int _xmlobj_sort_comparison (const void * a, const void * b);
XMLAPI XML * xmlobj_list_sort (XML * list, XML * class, const char * order)
{
   int i;
   XML * child;
   XML * sort = xml_create ("sort");
   XML * sortelem;
   XML * field;
   char * mark;
   const char * mark2;
   struct _xmlobj_sort_hdl * array;
   ELEMENTLIST * elist;

   xml_set (list, "error-state", "");

   /* Parse the order specifier. */
   mark = strchr (order, ',');
   while (mark) {
      sortelem = xml_create ("field");
      xml_set (sortelem, "id", "");
      xml_attrncat (sortelem, "id", order, mark - order);
      xml_append (sort, sortelem);
      order = mark + 1;
      while (*order == ' ') order++;

      mark = strchr (order, ',');
   }
   sortelem = xml_create ("field");
   xml_set (sortelem, "id", order);
   xml_append (sort, sortelem);

   /* Scan the order elements to fix up asc/desc and retrieve types. */
   sortelem = xml_firstelem (sort);
   while (sortelem) {
      xml_set (sortelem, "dir", "asc");
      mark = strchr (xml_attrval (sortelem, "id"), ' ');
      if (mark) {
         xml_set (sortelem, "dir", mark + 1);
         while (*xml_attrval (sortelem, "dir") == ' ') {
            mark2 = xml_attrval (sortelem, "dir");
            xml_set (sortelem, "dir", mark2 + 1);
         }
         xml_set (sortelem, "newid", "");
         xml_attrncat (sortelem, "newid", xml_attrval (sortelem, "id"), mark - xml_attrval (sortelem, "id"));
         xml_set (sortelem, "id", xml_attrval (sortelem, "newid"));
         xml_unset (sortelem, "newid");

         if (strcmp (xml_attrval (sortelem, "dir"), "asc") && strcmp (xml_attrval (sortelem, "dir"), "desc")) {
            xml_setf (list, "error-state", "Unknown sort direction '%s' specified", xml_attrval (sortelem, "dir"));
            xml_free (sort);
            return (NULL);
         }
      }

      if (class && *xml_attrval (sortelem, "id") != '?' && *xml_attrval (sortelem, "id") != '!') {
         field = xml_search (class, "field", "id", xml_attrval (sortelem, "id"));
         if (!field) {
            xml_setf (list, "error-state", "Unknown field '%s' specified", xml_attrval (sortelem, "id"));
            xml_free (sort);
            return (NULL);
         }

         xml_set (sortelem, "type", xml_attrval (field, "type"));
      }

      if (*xml_attrval (sortelem, "id") == '?') {
         mark2 = xml_attrval (sortelem, "id");
         xml_set (sortelem, "id", mark2 + 1);
      }

      sortelem = xml_nextelem (sortelem);
   }

   /* Count the children. */
   i=0; child = xml_firstelem (list);
   while (child) {
      i++;
      child = xml_nextelem (child);
   }
   if (i < 2) {
      xml_free (sort);
      return list;
   }

   /* Build the array. */
   array = (struct _xmlobj_sort_hdl *) malloc (i * sizeof (struct _xmlobj_sort_hdl));
   i=0; child = xml_firstelem (list);
   while (child) {
      array[i].sort = sort;
      array[i].class = class;
      array[i].elem = child;
      i++;
      child = xml_nextelem (child);
   }

   /* Sort the array. */
   qsort (array, i, sizeof (struct _xmlobj_sort_hdl), _xmlobj_sort_comparison);

   /* Rearrange the children, being very slick about it. */
   i = 0; elist = list->children;
   while (elist) {
      if (elist->element->name) {
         elist->element = array[i].elem;
         i++;
      }
      elist = elist->next;
   }

   free ((void *) array);
   xml_free (sort);

   return list;
}

The comparison function is only slightly more involved than that in the XMLAPI, since it scans the sort elements instead of being limited to a single sort value.

int _xmlobj_sort_comparison (const void * a, const void * b)
{
   XML * sort;
   int res;
   int ia, ib;
   char * ca, * cb;
   struct _xmlobj_sort_hdl * _a = (struct _xmlobj_sort_hdl *) a;
   struct _xmlobj_sort_hdl * _b = (struct _xmlobj_sort_hdl *) b;

   if (a == b) return 0;

   sort = xml_firstelem (_a->sort);
   while (sort) {
      if (!strcmp (xml_attrval (sort, "type"), "numeric")) {
         ia = xmlobj_getnum (_a->elem, _a->class, xml_attrval (sort, "id"));
         ib = xmlobj_getnum (_b->elem, _b->class, xml_attrval (sort, "id"));
         res = 0;
         if (ia < ib) res = -1;
         if (ia > ib) res = 1;
      } else {
         ca = xmlobj_get (_a->elem, _a->class, xml_attrval (sort, "id"));
         cb = xmlobj_get (_b->elem, _b->class, xml_attrval (sort, "id"));
         if (!ca) ca = strdup ("");
         if (!cb) cb = strdup ("");
         res = strcmp (ca, cb);
         free (ca);
         free (cb);
      }

      if (!strcmp (xml_attrval (sort, "dir"), "desc")) res = - res;
      if (res) return res;

      sort = xml_nextelem (sort);
   }
   return 0;
}

[ Previous: xmlobj include file ] [ Top: xmlobj library ] [ Next: XML object storage ]

This code and documentation are released under the terms of the GNU license. They are copyright (c) 2003, Vivtek. All rights reserved except those explicitly granted under the terms of the GNU license.