1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378 |
- # file llsd.py
- #
- # $LicenseInfo:firstyear=2006&license=mit$
- #
- # Copyright (c) 2006-2009, Linden Research, Inc.
- #
- # Permission is hereby granted, free of charge, to any person obtaining a copy
- # of this software and associated documentation files (the "Software"), to deal
- # in the Software without restriction, including without limitation the rights
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- # copies of the Software, and to permit persons to whom the Software is
- # furnished to do so, subject to the following conditions:
- #
- # The above copyright notice and this permission notice shall be included in
- # all copies or substantial portions of the Software.
- #
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- # THE SOFTWARE.
- # $/LicenseInfo$
- """
- Types as well as parsing and formatting functions for handling LLSD.
- This is the llsd module -- parsers and formatters between the
- supported subset of mime types and python objects. Documentation
- available on the Second Life wiki:
- http://wiki.secondlife.com/wiki/LLSD
- """
- from __future__ import absolute_import
- from __future__ import division
- import sys
- import base64
- import binascii
- import calendar
- import datetime
- import re
- import struct
- import time
- import types
- import uuid
- import os
- from .fastest_elementtree import ElementTreeError, fromstring
- PY2 = sys.version_info[0] == 2
- XML_MIME_TYPE = 'application/llsd+xml'
- BINARY_MIME_TYPE = 'application/llsd+binary'
- NOTATION_MIME_TYPE = 'application/llsd+notation'
- class LLSDParseError(Exception):
- "Exception raised when the parser fails."
- pass
- class LLSDSerializationError(TypeError):
- "Exception raised when serialization fails."
- pass
- if PY2:
- class binary(str):
- "Simple wrapper for llsd.binary data."
- pass
- else:
- binary = bytes
- class uri(str):
- "Simple wrapper for llsd.uri data."
- pass
- # In Python 2, this expression produces (str, unicode); in Python 3 it's
- # simply (str,). Either way, it's valid to test isinstance(somevar,
- # StringTypes). (Some consumers test (type(somevar) in StringTypes), so we do
- # want (str,) rather than plain str.)
- StringTypes = tuple(set((type(''), type(u''))))
- try:
- LongType = long
- IntTypes = (int, long)
- except NameError:
- LongType = int
- IntTypes = int
- try:
- UnicodeType = unicode
- except NameError:
- UnicodeType = str
- # can't just check for NameError: 'bytes' is defined in both Python 2 and 3
- if PY2:
- BytesType = str
- else:
- BytesType = bytes
- try:
- b'%s' % (b'yes',)
- except TypeError:
- # There's a range of Python 3 versions, up through Python 3.4, for which
- # bytes interpolation (bytes value with % operator) does not work. This
- # hack can be removed once we no longer care about Python 3.4 -- in other
- # words, once we're beyond jessie everywhere.
- class B(object):
- """
- Instead of writing:
- b'format string' % stuff
- write:
- B('format string') % stuff
- This class performs the conversions necessary to support bytes
- interpolation when the language doesn't natively support it.
- (We considered naming this class b, but that would be too confusing.)
- """
- def __init__(self, fmt):
- # Instead of storing the format string as bytes and converting it
- # to string every time, convert initially and store the string.
- try:
- self.strfmt = fmt.decode('utf-8')
- except AttributeError:
- # caller passed a string literal rather than a bytes literal
- self.strfmt = fmt
- def __mod__(self, args):
- # __mod__() is engaged for (self % args)
- if not isinstance(args, tuple):
- # Unify the tuple and non-tuple cases.
- args = (args,)
- # In principle, this is simple: convert everything to string,
- # interpolate, convert back. It's complicated by the fact that we
- # must handle non-bytes args.
- strargs = []
- for arg in args:
- try:
- decoder = arg.decode
- except AttributeError:
- # use arg exactly as is
- strargs.append(arg)
- else:
- # convert from bytes to string
- strargs.append(decoder('utf-8'))
- return (self.strfmt % tuple(strargs)).encode('utf-8')
- else:
- # bytes interpolation Just Works
- def B(fmt):
- try:
- # In the usual case, caller wrote B('fmt') rather than b'fmt'. But
- # s/he really wants a bytes literal here. Encode the passed string.
- return fmt.encode('utf-8')
- except AttributeError:
- # Caller wrote B(b'fmt')?
- return fmt
- def is_integer(o):
- """ portable test if an object is like an int """
- return isinstance(o, IntTypes)
- def is_unicode(o):
- """ portable check if an object is unicode and not bytes """
- return isinstance(o, UnicodeType)
- def is_string(o):
- """ portable check if an object is string-like """
- return isinstance(o, StringTypes)
- def is_bytes(o):
- """ portable check if an object is an immutable byte array """
- return isinstance(o, BytesType)
- _int_regex = re.compile(br"[-+]?\d+")
- _real_regex = re.compile(br"[-+]?(?:(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?)|[-+]?inf|[-+]?nan")
- _alpha_regex = re.compile(br"[a-zA-Z]+")
- _true_regex = re.compile(br"TRUE|true|\b[Tt]\b")
- _false_regex = re.compile(br"FALSE|false|\b[Ff]\b")
- _date_regex = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T"
- r"(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})"
- r"(?P<second_float>(\.\d+)?)Z")
- #date: d"YYYY-MM-DDTHH:MM:SS.FFFFFFZ"
- def _str_to_bytes(s):
- if is_unicode(s):
- return s.encode('utf-8')
- else:
- return s
- def _format_datestr(v):
- """
- Formats a datetime or date object into the string format shared by
- xml and notation serializations.
- """
- if not isinstance(v, datetime.date) and not isinstance(v, datetime.datetime):
- raise LLSDParseError("invalid date string %s passed to date formatter" % s)
- if not isinstance(v, datetime.datetime):
- v = datetime.datetime.combine(v, datetime.time(0))
- return _str_to_bytes(v.isoformat() + 'Z')
- def _parse_datestr(datestr):
- """
- Parses a datetime object from the string format shared by
- xml and notation serializations.
- """
- if datestr == "":
- return datetime.datetime(1970, 1, 1)
- match = re.match(_date_regex, datestr)
- if not match:
- raise LLSDParseError("invalid date string '%s'." % datestr)
- year = int(match.group('year'))
- month = int(match.group('month'))
- day = int(match.group('day'))
- hour = int(match.group('hour'))
- minute = int(match.group('minute'))
- second = int(match.group('second'))
- seconds_float = match.group('second_float')
- usec = 0
- if seconds_float:
- usec = int(float('0' + seconds_float) * 1e6)
- return datetime.datetime(year, month, day, hour, minute, second, usec)
- def _bool_to_python(node):
- "Convert boolean node to a python object."
- val = node.text or ''
- try:
- # string value, accept 'true' or 'True' or whatever
- return (val.lower() == 'true')
- except AttributeError:
- # not a string (no lower() method), use normal Python rules
- return bool(val)
- def _int_to_python(node):
- "Convert integer node to a python object."
- val = node.text or ''
- if not val.strip():
- return 0
- return int(val)
- def _real_to_python(node):
- "Convert floating point node to a python object."
- val = node.text or ''
- if not val.strip():
- return 0.0
- return float(val)
- def _uuid_to_python(node):
- "Convert uuid node to a python object."
- if node.text:
- return uuid.UUID(hex=node.text)
- return uuid.UUID(int=0)
- def _str_to_python(node):
- "Convert string node to a python object."
- return node.text or ''
- def _bin_to_python(node):
- base = node.get('encoding') or 'base64'
- try:
- if base == 'base16':
- # parse base16 encoded data
- return binary(base64.b16decode(node.text or ''))
- elif base == 'base64':
- # parse base64 encoded data
- return binary(base64.b64decode(node.text or ''))
- elif base == 'base85':
- return LLSDParseError("Parser doesn't support base85 encoding")
- except binascii.Error as exc:
- # convert exception class so it's more catchable
- return LLSDParseError("Encoded binary data: " + str(exc))
- except TypeError as exc:
- # convert exception class so it's more catchable
- return LLSDParseError("Bad binary data: " + str(exc))
- def _date_to_python(node):
- "Convert date node to a python object."
- val = node.text or ''
- if not val:
- val = "1970-01-01T00:00:00Z"
- return _parse_datestr(val)
- def _uri_to_python(node):
- "Convert uri node to a python object."
- val = node.text or ''
- return uri(val)
- def _map_to_python(node):
- "Convert map node to a python object."
- result = {}
- for index in range(len(node))[::2]:
- if node[index].text is None:
- result[''] = _to_python(node[index+1])
- else:
- result[node[index].text] = _to_python(node[index+1])
- return result
- def _array_to_python(node):
- "Convert array node to a python object."
- return [_to_python(child) for child in node]
- NODE_HANDLERS = dict(
- undef=lambda x: None,
- boolean=_bool_to_python,
- integer=_int_to_python,
- real=_real_to_python,
- uuid=_uuid_to_python,
- string=_str_to_python,
- binary=_bin_to_python,
- date=_date_to_python,
- uri=_uri_to_python,
- map=_map_to_python,
- array=_array_to_python,
- )
- def _to_python(node):
- "Convert node to a python object."
- return NODE_HANDLERS[node.tag](node)
- if PY2:
- ALL_CHARS = str(bytearray(range(256)))
- else:
- ALL_CHARS = bytes(range(256))
- INVALID_XML_BYTES = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c'\
- b'\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18'\
- b'\x19\x1a\x1b\x1c\x1d\x1e\x1f'
- INVALID_XML_RE = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f]')
- def remove_invalid_xml_bytes(b):
- try:
- # Dropping chars that cannot be parsed later on. The
- # translate() function was benchmarked to be the fastest way
- # to do this.
- return b.translate(ALL_CHARS, INVALID_XML_BYTES)
- except TypeError:
- # we get here if s is a unicode object (should be limited to
- # unit tests)
- return INVALID_XML_RE.sub('', b)
- class LLSDBaseFormatter(object):
- """
- This base class cannot be instantiated on its own: it assumes a subclass
- containing methods with canonical names specified in self.__init__(). The
- role of this base class is to provide self.type_map based on the methods
- defined in its subclass.
- """
- def __init__(self):
- "Construct a new formatter dispatch table."
- self.type_map = {
- type(None) : self.UNDEF,
- bool : self.BOOLEAN,
- int : self.INTEGER,
- LongType : self.INTEGER,
- float : self.REAL,
- uuid.UUID : self.UUID,
- binary : self.BINARY,
- str : self.STRING,
- UnicodeType : self.STRING,
- uri : self.URI,
- datetime.datetime : self.DATE,
- datetime.date : self.DATE,
- list : self.ARRAY,
- tuple : self.ARRAY,
- types.GeneratorType : self.ARRAY,
- dict : self.MAP,
- LLSD : self.LLSD
- }
- class LLSDXMLFormatter(LLSDBaseFormatter):
- """
- Class which implements LLSD XML serialization..
- http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
- This class wraps both a pure python and c-extension for formatting
- a limited subset of python objects as application/llsd+xml. You do
- not generally need to make an instance of this object since the
- module level format_xml is the most convenient interface to this
- functionality.
- """
- def _elt(self, name, contents=None):
- "Serialize a single element."
- if not contents:
- return B("<%s />") % (name,)
- else:
- return B("<%s>%s</%s>") % (name, _str_to_bytes(contents), name)
- def xml_esc(self, v):
- "Escape string or unicode object v for xml output"
- if is_string(v):
- # we need to drop these invalid characters because they
- # cannot be parsed (and encode() doesn't drop them for us)
- v = v.replace(u'\uffff', u'')
- v = v.replace(u'\ufffe', u'')
- v = v.encode('utf-8')
- v = remove_invalid_xml_bytes(v)
- return v.replace(b'&',b'&').replace(b'<',b'<').replace(b'>',b'>')
- def LLSD(self, v):
- return self._generate(v.thing)
- def UNDEF(self, _v):
- return self._elt(b'undef')
- def BOOLEAN(self, v):
- if v:
- return self._elt(b'boolean', b'true')
- else:
- return self._elt(b'boolean', b'false')
- def INTEGER(self, v):
- return self._elt(b'integer', str(v))
- def REAL(self, v):
- return self._elt(b'real', repr(v))
- def UUID(self, v):
- if v.int == 0:
- return self._elt(b'uuid')
- else:
- return self._elt(b'uuid', str(v))
- def BINARY(self, v):
- return self._elt(b'binary', base64.b64encode(v).strip())
- def STRING(self, v):
- return self._elt(b'string', self.xml_esc(v))
- def URI(self, v):
- return self._elt(b'uri', self.xml_esc(str(v)))
- def DATE(self, v):
- return self._elt(b'date', _format_datestr(v))
- def ARRAY(self, v):
- return self._elt(
- b'array',
- b''.join([self._generate(item) for item in v]))
- def MAP(self, v):
- return self._elt(
- b'map',
- b''.join([B("%s%s") % (self._elt(b'key', self.xml_esc(UnicodeType(key))),
- self._generate(value))
- for key, value in v.items()]))
- typeof = type
- def _generate(self, something):
- "Generate xml from a single python object."
- t = self.typeof(something)
- if t in self.type_map:
- return self.type_map[t](something)
- else:
- raise LLSDSerializationError(
- "Cannot serialize unknown type: %s (%s)" % (t, something))
- def _format(self, something):
- "Pure Python implementation of the formatter."
- return b'<?xml version="1.0" ?>' + self._elt(b"llsd", self._generate(something))
- def format(self, something):
- """
- Format a python object as application/llsd+xml
- :param something: A python object (typically a dict) to be serialized.
- :returns: Returns an XML formatted string.
- """
- return self._format(something)
- _g_xml_formatter = None
- def format_xml(something):
- """
- Format a python object as application/llsd+xml
- :param something: a python object (typically a dict) to be serialized.
- :returns: Returns an XML formatted string.
- Ssee http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
- This function wraps both a pure python and c-extension for formatting
- a limited subset of python objects as application/llsd+xml.
- """
- global _g_xml_formatter
- if _g_xml_formatter is None:
- _g_xml_formatter = LLSDXMLFormatter()
- return _g_xml_formatter.format(something)
- class LLSDXMLPrettyFormatter(LLSDXMLFormatter):
- """
- Class which implements 'pretty' LLSD XML serialization..
- See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
- The output conforms to the LLSD DTD, unlike the output from the
- standard python xml.dom DOM::toprettyxml() method which does not
- preserve significant whitespace.
- This class is not necessarily suited for serializing very large objects.
- It sorts on dict (llsd map) keys alphabetically to ease human reading.
- """
- def __init__(self, indent_atom = None):
- "Construct a pretty serializer."
- # Call the super class constructor so that we have the type map
- super(LLSDXMLPrettyFormatter, self).__init__()
- # Override the type map to use our specialized formatters to
- # emit the pretty output.
- self.type_map[list] = self.PRETTY_ARRAY
- self.type_map[tuple] = self.PRETTY_ARRAY
- self.type_map[types.GeneratorType] = self.PRETTY_ARRAY,
- self.type_map[dict] = self.PRETTY_MAP
- # Private data used for indentation.
- self._indent_level = 1
- if indent_atom is None:
- self._indent_atom = b' '
- else:
- self._indent_atom = indent_atom
- def _indent(self):
- "Return an indentation based on the atom and indentation level."
- return self._indent_atom * self._indent_level
- def PRETTY_ARRAY(self, v):
- "Recursively format an array with pretty turned on."
- rv = []
- rv.append(b'<array>\n')
- self._indent_level = self._indent_level + 1
- rv.extend([B("%s%s\n") %
- (self._indent(),
- self._generate(item))
- for item in v])
- self._indent_level = self._indent_level - 1
- rv.append(self._indent())
- rv.append(b'</array>')
- return b''.join(rv)
- def PRETTY_MAP(self, v):
- "Recursively format a map with pretty turned on."
- rv = []
- rv.append(b'<map>\n')
- self._indent_level = self._indent_level + 1
- # list of keys
- keys = list(v)
- keys.sort()
- rv.extend([B("%s%s\n%s%s\n") %
- (self._indent(),
- self._elt(b'key', UnicodeType(key)),
- self._indent(),
- self._generate(v[key]))
- for key in keys])
- self._indent_level = self._indent_level - 1
- rv.append(self._indent())
- rv.append(b'</map>')
- return b''.join(rv)
- def format(self, something):
- """
- Format a python object as application/llsd+xml
- :param something: a python object (typically a dict) to be serialized.
- :returns: Returns an XML formatted string.
- """
- data = []
- data.append(b'<?xml version="1.0" ?>\n<llsd>')
- data.append(self._generate(something))
- data.append(b'</llsd>\n')
- return b'\n'.join(data)
- def format_pretty_xml(something):
- """
- Serialize a python object as 'pretty' application/llsd+xml.
- :param something: a python object (typically a dict) to be serialized.
- :returns: Returns an XML formatted string.
- See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
- The output conforms to the LLSD DTD, unlike the output from the
- standard python xml.dom DOM::toprettyxml() method which does not
- preserve significant whitespace.
- This function is not necessarily suited for serializing very large
- objects. It sorts on dict (llsd map) keys alphabetically to ease human
- reading.
- """
- return LLSDXMLPrettyFormatter().format(something)
- class LLSDNotationFormatter(LLSDBaseFormatter):
- """
- Serialize a python object as application/llsd+notation
- See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
- """
- def LLSD(self, v):
- return self._generate(v.thing)
- def UNDEF(self, v):
- return b'!'
- def BOOLEAN(self, v):
- if v:
- return b'true'
- else:
- return b'false'
- def INTEGER(self, v):
- return B("i%d") % v
- def REAL(self, v):
- return B("r%r") % v
- def UUID(self, v):
- # latin-1 is the byte-to-byte encoding, mapping \x00-\xFF ->
- # \u0000-\u00FF. It's also the fastest encoding, I believe, from
- # https://docs.python.org/3/library/codecs.html#encodings-and-unicode
- # UUID doesn't like the hex to be a bytes object, so I have to
- # convert it to a string. I chose latin-1 to exactly match the old
- # error behavior in case someone passes an invalid hex string, with
- # things other than 0-9a-fA-F, so that they will fail in the UUID
- # decode, rather than with a UnicodeError.
- return B("u%s") % str(v).encode('latin-1')
- def BINARY(self, v):
- return b'b64"' + base64.b64encode(v).strip() + b'"'
- def STRING(self, v):
- return B("'%s'") % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b"'", b"\\'")
- def URI(self, v):
- return B('l"%s"') % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b'"', b'\\"')
- def DATE(self, v):
- return B('d"%s"') % _format_datestr(v)
- def ARRAY(self, v):
- return B("[%s]") % b','.join([self._generate(item) for item in v])
- def MAP(self, v):
- return B("{%s}") % b','.join([B("'%s':%s") % (_str_to_bytes(UnicodeType(key)).replace(b"\\", b"\\\\").replace(b"'", b"\\'"), self._generate(value))
- for key, value in v.items()])
- def _generate(self, something):
- "Generate notation from a single python object."
- t = type(something)
- handler = self.type_map.get(t)
- if handler:
- return handler(something)
- else:
- try:
- return self.ARRAY(iter(something))
- except TypeError:
- raise LLSDSerializationError(
- "Cannot serialize unknown type: %s (%s)" % (t, something))
- def format(self, something):
- """
- Format a python object as application/llsd+notation
- :param something: a python object (typically a dict) to be serialized.
- :returns: Returns a LLSD notation formatted string.
- """
- return self._generate(something)
- def format_notation(something):
- """
- Format a python object as application/llsd+notation
- :param something: a python object (typically a dict) to be serialized.
- :returns: Returns a LLSD notation formatted string.
- See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
- """
- return LLSDNotationFormatter().format(something)
- def _hex_as_nybble(hex):
- "Accepts a single hex character and returns a nybble."
- if (hex >= b'0') and (hex <= b'9'):
- return ord(hex) - ord(b'0')
- elif (hex >= b'a') and (hex <=b'f'):
- return 10 + ord(hex) - ord(b'a')
- elif (hex >= b'A') and (hex <=b'F'):
- return 10 + ord(hex) - ord(b'A')
- else:
- raise LLSDParseError('Invalid hex character: %s' % hex)
- class LLSDBaseParser(object):
- """
- Utility methods useful for parser subclasses.
- """
- def __init__(self):
- self._buffer = b''
- self._index = 0
- def _error(self, message, offset=0):
- try:
- byte = self._buffer[self._index+offset]
- except IndexError:
- byte = None
- raise LLSDParseError("%s at byte %d: %s" % (message, self._index+offset, byte))
- def _peek(self, num=1):
- if num < 0:
- # There aren't many ways this can happen. The likeliest is that
- # we've just read garbage length bytes from a binary input string.
- # We happen to know that lengths are encoded as 4 bytes, so back
- # off by 4 bytes to try to point the user at the right spot.
- self._error("Invalid length field %d" % num, -4)
- if self._index + num > len(self._buffer):
- self._error("Trying to read past end of buffer")
- return self._buffer[self._index:self._index + num]
- def _getc(self, num=1):
- chars = self._peek(num)
- self._index += num
- return chars
- # map char following escape char to corresponding character
- _escaped = {
- b'a': b'\a',
- b'b': b'\b',
- b'f': b'\f',
- b'n': b'\n',
- b'r': b'\r',
- b't': b'\t',
- b'v': b'\v',
- }
- def _parse_string_delim(self, delim):
- "Parse a delimited string."
- parts = bytearray()
- found_escape = False
- found_hex = False
- found_digit = False
- byte = 0
- while True:
- cc = self._getc()
- if found_escape:
- if found_hex:
- if found_digit:
- found_escape = False
- found_hex = False
- found_digit = False
- byte <<= 4
- byte |= _hex_as_nybble(cc)
- parts.append(byte)
- byte = 0
- else:
- found_digit = True
- byte = _hex_as_nybble(cc)
- elif cc == b'x':
- found_hex = True
- else:
- found_escape = False
- # escape char preceding anything other than the chars in
- # _escaped just results in that same char without the
- # escape char
- parts.extend(self._escaped.get(cc, cc))
- elif cc == b'\\':
- found_escape = True
- elif cc == delim:
- break
- else:
- parts.extend(cc)
- try:
- return parts.decode('utf-8')
- except UnicodeDecodeError as exc:
- self._error(exc)
- class LLSDBinaryParser(LLSDBaseParser):
- """
- Parse application/llsd+binary to a python object.
- See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
- """
- def __init__(self):
- super(LLSDBinaryParser, self).__init__()
- # One way of dispatching based on the next character we see would be a
- # dict lookup, and indeed that's the best way to express it in source.
- _dispatch_dict = {
- b'{': self._parse_map,
- b'[': self._parse_array,
- b'!': lambda: None,
- b'0': lambda: False,
- b'1': lambda: True,
- # 'i' = integer
- b'i': lambda: struct.unpack("!i", self._getc(4))[0],
- # 'r' = real number
- b'r': lambda: struct.unpack("!d", self._getc(8))[0],
- # 'u' = uuid
- b'u': lambda: uuid.UUID(bytes=self._getc(16)),
- # 's' = string
- b's': self._parse_string,
- # delimited/escaped string
- b"'": lambda: self._parse_string_delim(b"'"),
- b'"': lambda: self._parse_string_delim(b'"'),
- # 'l' = uri
- b'l': lambda: uri(self._parse_string()),
- # 'd' = date in seconds since epoch
- b'd': self._parse_date,
- # 'b' = binary
- # *NOTE: if not self._keep_binary, maybe have a binary placeholder
- # which has the length.
- b'b': lambda: binary(self._parse_string_raw()) if self._keep_binary else None,
- }
- # But in fact it should be even faster to construct a list indexed by
- # ord(char). Start by filling it with the 'else' case. Use offset=-1
- # because by the time we perform this lookup, we've scanned past the
- # lookup char.
- self._dispatch = 256*[lambda: self._error("invalid binary token", -1)]
- # Now use the entries in _dispatch_dict to set the corresponding
- # entries in _dispatch.
- for c, func in _dispatch_dict.items():
- self._dispatch[ord(c)] = func
- def parse(self, buffer, ignore_binary = False):
- """
- This is the basic public interface for parsing.
- :param buffer: the binary data to parse in an indexable sequence.
- :param ignore_binary: parser throws away data in llsd binary nodes.
- :returns: returns a python object.
- """
- self._buffer = buffer
- self._index = 0
- self._keep_binary = not ignore_binary
- try:
- return self._parse()
- except struct.error as exc:
- self._error(exc)
- def _parse(self):
- "The actual parser which is called recursively when necessary."
- cc = self._getc()
- try:
- func = self._dispatch[ord(cc)]
- except IndexError:
- self._error("invalid binary token", -1)
- else:
- return func()
- def _parse_map(self):
- "Parse a single llsd map"
- rv = {}
- size = struct.unpack("!i", self._getc(4))[0]
- count = 0
- cc = self._getc()
- key = b''
- while (cc != b'}') and (count < size):
- if cc == b'k':
- key = self._parse_string()
- elif cc in (b"'", b'"'):
- key = self._parse_string_delim(cc)
- else:
- self._error("invalid map key", -1)
- value = self._parse()
- rv[key] = value
- count += 1
- cc = self._getc()
- if cc != b'}':
- self._error("invalid map close token")
- return rv
- def _parse_array(self):
- "Parse a single llsd array"
- rv = []
- size = struct.unpack("!i", self._getc(4))[0]
- count = 0
- cc = self._peek()
- while (cc != b']') and (count < size):
- rv.append(self._parse())
- count += 1
- cc = self._peek()
- if cc != b']':
- self._error("invalid array close token")
- self._index += 1
- return rv
- def _parse_string(self):
- try:
- return self._parse_string_raw().decode('utf-8')
- except UnicodeDecodeError as exc:
- self._error(exc)
- def _parse_string_raw(self):
- "Parse a string which has the leadings size indicator"
- try:
- size = struct.unpack("!i", self._getc(4))[0]
- except struct.error as exc:
- # convert exception class for client convenience
- self._error("struct " + str(exc))
- rv = self._getc(size)
- return rv
- def _parse_date(self):
- seconds = struct.unpack("<d", self._getc(8))[0]
- try:
- return datetime.datetime.utcfromtimestamp(seconds)
- except OverflowError as exc:
- # A garbage seconds value can cause utcfromtimestamp() to raise
- # OverflowError: timestamp out of range for platform time_t
- self._error(exc, -8)
- class LLSDNotationParser(LLSDBaseParser):
- """
- Parse LLSD notation.
- See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
- * map: { string:object, string:object }
- * array: [ object, object, object ]
- * undef: !
- * boolean: true | false | 1 | 0 | T | F | t | f | TRUE | FALSE
- * integer: i####
- * real: r####
- * uuid: u####
- * string: "g\'day" | 'have a "nice" day' | s(size)"raw data"
- * uri: l"escaped"
- * date: d"YYYY-MM-DDTHH:MM:SS.FFZ"
- * binary: b##"ff3120ab1" | b(size)"raw data"
- """
- def __init__(self):
- super(LLSDNotationParser, self).__init__()
- # Like LLSDBinaryParser, we want to dispatch based on the current
- # character.
- _dispatch_dict = {
- # map
- b'{': self._parse_map,
- # array
- b'[': self._parse_array,
- # undefined -- have to eat the '!'
- b'!': lambda: self._skip_then(None),
- # false -- have to eat the '0'
- b'0': lambda: self._skip_then(False),
- # true -- have to eat the '1'
- b'1': lambda: self._skip_then(True),
- # false, must check for F|f|false|FALSE
- b'F': lambda: self._get_re("'false'", _false_regex, False),
- b'f': lambda: self._get_re("'false'", _false_regex, False),
- # true, must check for T|t|true|TRUE
- b'T': lambda: self._get_re("'true'", _true_regex, True),
- b't': lambda: self._get_re("'true'", _true_regex, True),
- # 'i' = integer
- b'i': self._parse_integer,
- # 'r' = real number
- b'r': self._parse_real,
- # 'u' = uuid
- b'u': self._parse_uuid,
- # string
- b"'": self._parse_string,
- b'"': self._parse_string,
- b's': self._parse_string,
- # 'l' = uri
- b'l': self._parse_uri,
- # 'd' = date in seconds since epoch
- b'd': self._parse_date,
- # 'b' = binary
- b'b': self._parse_binary,
- }
- # Like LLSDBinaryParser, construct a lookup list from this dict. Start
- # by filling with the 'else' case.
- self._dispatch = 256*[lambda: self._error("Invalid notation token")]
- # Then fill in specific entries based on the dict above.
- for c, func in _dispatch_dict.items():
- self._dispatch[ord(c)] = func
- def parse(self, buffer, ignore_binary = False):
- """
- This is the basic public interface for parsing.
- :param buffer: the notation string to parse.
- :param ignore_binary: parser throws away data in llsd binary nodes.
- :returns: returns a python object.
- """
- if buffer == b"":
- return False
- self._buffer = buffer
- self._index = 0
- return self._parse()
- def _get_until(self, delim):
- start = self._index
- end = self._buffer.find(delim, start)
- if end == -1:
- return None
- else:
- self._index = end + 1
- return self._buffer[start:end]
- def _skip_then(self, value):
- # We've already _peek()ed at the current character, which is how we
- # decided to call this method. Skip past it and return constant value.
- self._getc()
- return value
- def _get_re(self, desc, regex, override=None):
- match = re.match(regex, self._buffer[self._index:])
- if not match:
- self._error("Invalid %s token" % desc)
- else:
- self._index += match.end()
- return override if override is not None else match.group(0)
- def _parse(self):
- "The notation parser workhorse."
- cc = self._peek()
- try:
- func = self._dispatch[ord(cc)]
- except IndexError:
- # output error if the token was out of range
- self._error("Invalid notation token")
- else:
- return func()
- def _parse_binary(self):
- "parse a single binary object."
- self._getc() # eat the beginning 'b'
- cc = self._peek()
- if cc == b'(':
- # parse raw binary
- paren = self._getc()
- # grab the 'expected' size of the binary data
- size = self._get_until(b')')
- if size == None:
- self._error("Invalid binary size")
- size = int(size)
- # grab the opening quote
- q = self._getc()
- if q != b'"':
- self._error('Expected " to start binary value')
- # grab the data
- data = self._getc(size)
- # grab the closing quote
- q = self._getc()
- if q != b'"':
- self._error('Expected " to end binary value')
- return binary(data)
- else:
- # get the encoding base
- base = self._getc(2)
- try:
- decoder = {
- b'16': base64.b16decode,
- b'64': base64.b64decode,
- }[base]
- except KeyError:
- self._error("Parser doesn't support base %s encoding" %
- base.decode('latin-1'))
- # grab the double quote
- q = self._getc()
- if q != b'"':
- self._error('Expected " to start binary value')
- # grab the encoded data
- encoded = self._get_until(q)
- try:
- return binary(decoder(encoded or b''))
- except binascii.Error as exc:
- # convert exception class so it's more catchable
- self._error("Encoded binary data: " + str(exc))
- except TypeError as exc:
- # convert exception class so it's more catchable
- self._error("Bad binary data: " + str(exc))
- def _parse_map(self):
- """
- parse a single map
- map: { string:object, string:object }
- """
- rv = {}
- key = b''
- found_key = False
- self._getc() # eat the beginning '{'
- cc = self._peek()
- while (cc != b'}'):
- if cc is None:
- self._error("Unclosed map")
- if not found_key:
- if cc in (b"'", b'"', b's'):
- key = self._parse_string()
- found_key = True
- elif cc.isspace() or cc == b',':
- self._getc() # eat the character
- pass
- else:
- self._error("Invalid map key")
- elif cc.isspace():
- self._getc() # eat the space
- pass
- elif cc == b':':
- self._getc() # eat the ':'
- value = self._parse()
- rv[key] = value
- found_key = False
- else:
- self._error("missing separator")
- cc = self._peek()
- if self._getc() != b'}':
- self._error("Invalid map close token")
- return rv
- def _parse_array(self):
- """
- parse a single array.
- array: [ object, object, object ]
- """
- rv = []
- self._getc() # eat the beginning '['
- cc = self._peek()
- while (cc != b']'):
- if cc is None:
- self._error('Unclosed array')
- if cc.isspace() or cc == b',':
- self._getc()
- cc = self._peek()
- continue
- rv.append(self._parse())
- cc = self._peek()
- if self._getc() != b']':
- self._error("Invalid array close token")
- return rv
- def _parse_uuid(self):
- "Parse a uuid."
- self._getc() # eat the beginning 'u'
- # see comment on LLSDNotationFormatter.UUID() re use of latin-1
- return uuid.UUID(hex=self._getc(36).decode('latin-1'))
- def _parse_uri(self):
- "Parse a URI."
- self._getc() # eat the beginning 'l'
- return uri(self._parse_string())
- def _parse_date(self):
- "Parse a date."
- self._getc() # eat the beginning 'd'
- datestr = self._parse_string()
- return _parse_datestr(datestr)
- def _parse_real(self):
- "Parse a floating point number."
- self._getc() # eat the beginning 'r'
- return float(self._get_re("real", _real_regex))
- def _parse_integer(self):
- "Parse an integer."
- self._getc() # eat the beginning 'i'
- return int(self._get_re("integer", _int_regex))
- def _parse_string(self):
- """
- Parse a string
- string: "g\'day" | 'have a "nice" day' | s(size)"raw data"
- """
- rv = ""
- delim = self._peek()
- if delim in (b"'", b'"'):
- delim = self._getc() # eat the beginning delim
- rv = self._parse_string_delim(delim)
- elif delim == b's':
- rv = self._parse_string_raw()
- else:
- self._error("invalid string token")
- return rv
- def _parse_string_raw(self):
- """
- Parse a sized specified string.
- string: s(size)"raw data"
- """
- self._getc() # eat the beginning 's'
- # Read the (size) portion.
- cc = self._getc()
- if cc != b'(':
- self._error("Invalid string token")
- size = self._get_until(b')')
- if size == None:
- self._error("Invalid string size")
- size = int(size)
- delim = self._getc()
- if delim not in (b"'", b'"'):
- self._error("Invalid string token")
- rv = self._getc(size)
- cc = self._getc()
- if cc != delim:
- self._error("Invalid string closure token")
- try:
- return rv.decode('utf-8')
- except UnicodeDecodeError as exc:
- raise LLSDParseError(exc)
- def format_binary(something):
- """
- Format application/llsd+binary to a python object.
- See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
- :param something: a python object (typically a dict) to be serialized.
- :returns: Returns a LLSD binary formatted string.
- """
- return b'<?llsd/binary?>\n' + _format_binary_recurse(something)
- def _format_binary_recurse(something):
- "Binary formatter workhorse."
- def _format_list(something):
- array_builder = []
- array_builder.append(b'[' + struct.pack('!i', len(something)))
- for item in something:
- array_builder.append(_format_binary_recurse(item))
- array_builder.append(b']')
- return b''.join(array_builder)
- if something is None:
- return b'!'
- elif isinstance(something, LLSD):
- return _format_binary_recurse(something.thing)
- elif isinstance(something, bool):
- if something:
- return b'1'
- else:
- return b'0'
- elif is_integer(something):
- try:
- return b'i' + struct.pack('!i', something)
- except (OverflowError, struct.error) as exc:
- raise LLSDSerializationError(str(exc), something)
- elif isinstance(something, float):
- try:
- return b'r' + struct.pack('!d', something)
- except SystemError as exc:
- raise LLSDSerializationError(str(exc), something)
- elif isinstance(something, uuid.UUID):
- return b'u' + something.bytes
- elif isinstance(something, binary):
- return b'b' + struct.pack('!i', len(something)) + something
- elif is_string(something):
- something = _str_to_bytes(something)
- return b's' + struct.pack('!i', len(something)) + something
- elif isinstance(something, uri):
- return b'l' + struct.pack('!i', len(something)) + something
- elif isinstance(something, datetime.datetime):
- seconds_since_epoch = calendar.timegm(something.utctimetuple()) \
- + something.microsecond // 1e6
- return b'd' + struct.pack('<d', seconds_since_epoch)
- elif isinstance(something, datetime.date):
- seconds_since_epoch = calendar.timegm(something.timetuple())
- return b'd' + struct.pack('<d', seconds_since_epoch)
- elif isinstance(something, (list, tuple)):
- return _format_list(something)
- elif isinstance(something, dict):
- map_builder = []
- map_builder.append(b'{' + struct.pack('!i', len(something)))
- for key, value in something.items():
- key = _str_to_bytes(key)
- map_builder.append(b'k' + struct.pack('!i', len(key)) + key)
- map_builder.append(_format_binary_recurse(value))
- map_builder.append(b'}')
- return b''.join(map_builder)
- else:
- try:
- return _format_list(list(something))
- except TypeError:
- raise LLSDSerializationError(
- "Cannot serialize unknown type: %s (%s)" %
- (type(something), something))
- def _startswith(startstr, something):
- if hasattr(something, 'startswith'):
- return something.startswith(startstr)
- else:
- pos = something.tell()
- s = something.read(len(startstr))
- something.seek(pos, os.SEEK_SET)
- return (s == startstr)
- def parse_binary(something):
- """
- This is the basic public interface for parsing llsd+binary.
- :param something: The data to parse in an indexable sequence.
- :returns: Returns a python object.
- """
- if _startswith(b'<?llsd/binary?>', something):
- just_binary = something.split(b'\n', 1)[1]
- else:
- just_binary = something
- return LLSDBinaryParser().parse(just_binary)
- declaration_regex = re.compile(br'^\s*(?:<\?[\x09\x0A\x0D\x20-\x7e]+\?>)|(?:<llsd>)')
- def validate_xml_declaration(something):
- if not declaration_regex.match(something):
- raise LLSDParseError("Invalid XML Declaration")
- def parse_xml(something):
- """
- This is the basic public interface for parsing llsd+xml.
- :param something: The data to parse.
- :returns: Returns a python object.
- """
- try:
- # validate xml declaration manually until http://bugs.python.org/issue7138 is fixed
- validate_xml_declaration(something)
- return _to_python(fromstring(something)[0])
- except ElementTreeError as err:
- raise LLSDParseError(*err.args)
- def parse_notation(something):
- """
- This is the basic public interface for parsing llsd+notation.
- :param something: The data to parse.
- :returns: Returns a python object.
- """
- return LLSDNotationParser().parse(something)
- def parse(something, mime_type = None):
- """
- This is the basic public interface for parsing llsd.
- :param something: The data to parse. This is expected to be bytes, not strings
- :param mime_type: The mime_type of the data if it is known.
- :returns: Returns a python object.
- Python 3 Note: when reading LLSD from a file, use open()'s 'rb' mode explicitly
- """
- if mime_type in (XML_MIME_TYPE, 'application/llsd'):
- return parse_xml(something)
- elif mime_type == BINARY_MIME_TYPE:
- return parse_binary(something)
- elif mime_type == NOTATION_MIME_TYPE:
- return parse_notation(something)
- #elif content_type == 'application/json':
- # return parse_notation(something)
- try:
- something = something.lstrip() #remove any pre-trailing whitespace
- if _startswith(b'<?llsd/binary?>', something):
- return parse_binary(something)
- # This should be better.
- elif _startswith(b'<', something):
- return parse_xml(something)
- else:
- return parse_notation(something)
- except KeyError as e:
- raise LLSDParseError('LLSD could not be parsed: %s' % (e,))
- except TypeError as e:
- raise LLSDParseError('Input stream not of type bytes. %s' % (e,))
- class LLSD(object):
- "Simple wrapper class for a thing."
- def __init__(self, thing=None):
- self.thing = thing
- def __bytes__(self):
- return self.as_xml(self.thing)
- def __str__(self):
- return self.__bytes__().decode()
- parse = staticmethod(parse)
- as_xml = staticmethod(format_xml)
- as_pretty_xml = staticmethod(format_pretty_xml)
- as_binary = staticmethod(format_binary)
- as_notation = staticmethod(format_notation)
- undef = LLSD(None)
|