""" The module provides two classes encoder and decoder that allow to serialize and deserialize python-ish PODs into/from XML. Note that data should be simple: None, True, False, strings, lists, tupls, dicts Anything other than this will trigger an error. Also note that any circular references in the data will also trigger an error, so please do not try to serialize something like: >>> a = [] >>> a.append(a) >>> a [[...]] Important notes: - tuples are treated as lists and deserialized into lists. - any empty list, tuple or dictionary is deserialized into None. TODO: Expand the notes on how exactly the data values are serialized. Some doctests: >>> def test_enc_dec(data, return_res=False): ... from xml.dom.minidom import parseString ... doc = parseString('') ... encoder().serialize(data, doc.documentElement) ... xml = doc.toprettyxml(' ') ... data2 = decoder().deserialize(doc.documentElement) ... if data2 != data: ... msg = '''--- Expected: --- ... %s ... --- Got: --- ... %s ... === Xml: === ... %s ... ''' % (data, data2, xml) ... if return_res: ... return data2 ... print msg >>> test_enc_dec(None) >>> test_enc_dec(True) >>> test_enc_dec(False) >>> test_enc_dec('string') >>> test_enc_dec(u'string') >>> test_enc_dec({'a':'b'}) >>> test_enc_dec([1,2]) >>> test_enc_dec(['1',2]) >>> test_enc_dec([1]) >>> test_enc_dec({'':'aa'}) >>> test_enc_dec(['_']) >>> test_enc_dec(['aa',['bb','cc'],[None], None, ['_']]) >>> test_enc_dec([[False]]) >>> test_enc_dec([[False], None]) >>> test_enc_dec([False, True, [False], [[True]], [None]]) >>> test_enc_dec({'vasya':['aa', 'bb']}) >>> test_enc_dec({'name':['Peter', 'Mary'], 'age':[11, 15]}) To fix: >>> test_enc_dec([], return_res=True) != None False >>> test_enc_dec({}, return_res=True) != None False """ TRUE_LABEL = u'True' FALSE_LABEL = u'False' class decoder: def deserialize(self, node): """ >>> from xml.dom.minidom import parseString >>> doc = parseString('toaat!!') >>> decoder().deserialize(doc.documentElement) {u'I': {None: [u'to'], u'want': None}, u'spend': {None: [u'a', u'at', u'!!'], u'Maui': None, u'month': {u'or': u'two'}}} """ data = self._decode_into_dict(node) return data def _reduce_list(self, l): if not isinstance(l, list): return l if len(l) == 0: return l if len(l) == 1: return l[0] if l[-1] is None: return l[:-1] return l def _reduce_diction(self, diction): # None value if len(diction) == 0: return None # Strings, booleans and None values if len(diction) == 1 and None in diction: if len(diction[None]) == 1: return diction[None][0] return diction[None] # Lists if len(diction) == 1 and '_' in diction: return self._reduce_list(diction['_']) data = {} for key in diction.keys(): if key is None: data[None] = diction[None] else: data[decoder._decode_tag(key)] = self._reduce_list(diction[key]) # elif data '_' diction = data return diction @classmethod def _decode_tag(clazz, tag): if len(tag) > 1 and tag[0:2] == '__': return tag[2:] return tag def _decode_into_dict(self, node): diction = {None:[]} for child in node.childNodes: if child.nodeType is child.TEXT_NODE or child.nodeType == child.CDATA_SECTION_NODE: diction[None].append(decoder._decode_string(child.data)) elif node.nodeType is child.ELEMENT_NODE: data = self._decode_into_dict(child) self._add_to_dict(diction, child.tagName, data) else: #TODO !! pass for attr in node.attributes.keys(): data = decoder._decode_string(node.attributes[attr].nodeValue) self._add_to_dict(diction, attr, data) if len(diction[None]) == 0: del diction[None] return self._reduce_diction(diction) def _add_to_dict(self, diction, key, data): if key not in diction: diction[key] = [data] else: # if not isinstance(diction[key], list): # diction[key] = [diction[key]] diction[key].append(data) @classmethod def _decode_string(clazz, str): """ >>> decoder._decode_string(None) >>> decoder._decode_string('True') True >>> decoder._decode_string('False') False >>> decoder._decode_string('11') 11 >>> decoder._decode_string('12L') 12L >>> decoder._decode_string('11.') 11.0 >>> decoder._decode_string('some') u'some' >>> decoder._decode_string('"some"') u'"some"' >>> decoder._decode_string('"some') u'"some' """ if str is None: return None elif str == TRUE_LABEL: return True elif str == FALSE_LABEL: return False try: return int(str) except Exception:pass try: return long(str) except Exception:pass try: return float(str) except Exception:pass str = unicode(str) if str[0] == '"' and str[-1] == '"': original = (str.replace('\\"', '"'))[1:-1] if encoder._escape_string(original) == str: return original return unicode(str) class encoder: def serialize(self, data, xml_node): self.__doc = xml_node.ownerDocument self.__markers = {} self._encode(data=data, node=xml_node) @classmethod def _encode_tag(clazz, tag): return '__' + tag def _create_element(self, tag): # TODO Account for wierd characters return self.__doc.createElement(tag) def _create_text(self, value): return self.__doc.createTextNode(value) @classmethod def _escape_string(clazz, str): if str.find('"') < 0: if str != TRUE_LABEL and str != FALSE_LABEL: try: int(str) except: try: long(str) except: try: float(str) except: # Great - the string won't be confused with int, long, # float or boolean - just spit it out then. return str # Ok, do the safe escaping of the string value return '"' + str.replace('"', '\\"') + '"' def _encode(self, data, node): """ @param node Is either a string or an XML node. If its a string then a node with such a name should be created, otherwise the existing xml node should be populated. """ if isinstance(data, (list, tuple)): self.__mark(data) children = [] if isinstance(node, basestring): tag = encoder._encode_tag(node) parent = None else: tag = '_' parent = node l = list(data) if len(l) >= 1: l.append(None) for d in l: child = self._create_element(tag) if parent is not None: parent.appendChild(child) self._encode(d, child) children.append(child) return children else: if isinstance(node, basestring): parent = self._create_element(encoder._encode_tag(node)) else: parent = node if isinstance(data, dict): self.__mark(data) for key in data.keys(): children = self._encode(data[key], key) if isinstance(children, list): for child in children: parent.appendChild(child) else: parent.appendChild(children) self.__unmark(data) else: if isinstance(data, basestring): child = self._create_text(encoder._escape_string(unicode(data))) elif data is None: child = None elif isinstance(data, (int, long, float)): child = self._create_text(unicode(data)) elif data is True: child = self._create_text(TRUE_LABEL) elif data is False: child = self._create_text(FALSE_LABEL) else: raise ValueError('Serialisation of "%s" is not supported.' % (data.__class__,)) if child is not None: parent.appendChild(child) return [parent] def __mark(self, obj): if id(obj) in self.__markers: raise ValueError('gchecky.encoder can\'t handle cyclic references.') self.__markers[id(obj)] = obj def __unmark(self, obj): del self.__markers[id(obj)] if __name__ == "__main__": def run_doctests(): import doctest doctest.testmod() run_doctests()