llsd.py 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378
  1. # file llsd.py
  2. #
  3. # $LicenseInfo:firstyear=2006&license=mit$
  4. #
  5. # Copyright (c) 2006-2009, Linden Research, Inc.
  6. #
  7. # Permission is hereby granted, free of charge, to any person obtaining a copy
  8. # of this software and associated documentation files (the "Software"), to deal
  9. # in the Software without restriction, including without limitation the rights
  10. # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  11. # copies of the Software, and to permit persons to whom the Software is
  12. # furnished to do so, subject to the following conditions:
  13. #
  14. # The above copyright notice and this permission notice shall be included in
  15. # all copies or substantial portions of the Software.
  16. #
  17. # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18. # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  20. # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21. # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22. # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  23. # THE SOFTWARE.
  24. # $/LicenseInfo$
  25. """
  26. Types as well as parsing and formatting functions for handling LLSD.
  27. This is the llsd module -- parsers and formatters between the
  28. supported subset of mime types and python objects. Documentation
  29. available on the Second Life wiki:
  30. http://wiki.secondlife.com/wiki/LLSD
  31. """
  32. from __future__ import absolute_import
  33. from __future__ import division
  34. import sys
  35. import base64
  36. import binascii
  37. import calendar
  38. import datetime
  39. import re
  40. import struct
  41. import time
  42. import types
  43. import uuid
  44. import os
  45. from .fastest_elementtree import ElementTreeError, fromstring
  46. PY2 = sys.version_info[0] == 2
  47. XML_MIME_TYPE = 'application/llsd+xml'
  48. BINARY_MIME_TYPE = 'application/llsd+binary'
  49. NOTATION_MIME_TYPE = 'application/llsd+notation'
  50. class LLSDParseError(Exception):
  51. "Exception raised when the parser fails."
  52. pass
  53. class LLSDSerializationError(TypeError):
  54. "Exception raised when serialization fails."
  55. pass
  56. if PY2:
  57. class binary(str):
  58. "Simple wrapper for llsd.binary data."
  59. pass
  60. else:
  61. binary = bytes
  62. class uri(str):
  63. "Simple wrapper for llsd.uri data."
  64. pass
  65. # In Python 2, this expression produces (str, unicode); in Python 3 it's
  66. # simply (str,). Either way, it's valid to test isinstance(somevar,
  67. # StringTypes). (Some consumers test (type(somevar) in StringTypes), so we do
  68. # want (str,) rather than plain str.)
  69. StringTypes = tuple(set((type(''), type(u''))))
  70. try:
  71. LongType = long
  72. IntTypes = (int, long)
  73. except NameError:
  74. LongType = int
  75. IntTypes = int
  76. try:
  77. UnicodeType = unicode
  78. except NameError:
  79. UnicodeType = str
  80. # can't just check for NameError: 'bytes' is defined in both Python 2 and 3
  81. if PY2:
  82. BytesType = str
  83. else:
  84. BytesType = bytes
  85. try:
  86. b'%s' % (b'yes',)
  87. except TypeError:
  88. # There's a range of Python 3 versions, up through Python 3.4, for which
  89. # bytes interpolation (bytes value with % operator) does not work. This
  90. # hack can be removed once we no longer care about Python 3.4 -- in other
  91. # words, once we're beyond jessie everywhere.
  92. class B(object):
  93. """
  94. Instead of writing:
  95. b'format string' % stuff
  96. write:
  97. B('format string') % stuff
  98. This class performs the conversions necessary to support bytes
  99. interpolation when the language doesn't natively support it.
  100. (We considered naming this class b, but that would be too confusing.)
  101. """
  102. def __init__(self, fmt):
  103. # Instead of storing the format string as bytes and converting it
  104. # to string every time, convert initially and store the string.
  105. try:
  106. self.strfmt = fmt.decode('utf-8')
  107. except AttributeError:
  108. # caller passed a string literal rather than a bytes literal
  109. self.strfmt = fmt
  110. def __mod__(self, args):
  111. # __mod__() is engaged for (self % args)
  112. if not isinstance(args, tuple):
  113. # Unify the tuple and non-tuple cases.
  114. args = (args,)
  115. # In principle, this is simple: convert everything to string,
  116. # interpolate, convert back. It's complicated by the fact that we
  117. # must handle non-bytes args.
  118. strargs = []
  119. for arg in args:
  120. try:
  121. decoder = arg.decode
  122. except AttributeError:
  123. # use arg exactly as is
  124. strargs.append(arg)
  125. else:
  126. # convert from bytes to string
  127. strargs.append(decoder('utf-8'))
  128. return (self.strfmt % tuple(strargs)).encode('utf-8')
  129. else:
  130. # bytes interpolation Just Works
  131. def B(fmt):
  132. try:
  133. # In the usual case, caller wrote B('fmt') rather than b'fmt'. But
  134. # s/he really wants a bytes literal here. Encode the passed string.
  135. return fmt.encode('utf-8')
  136. except AttributeError:
  137. # Caller wrote B(b'fmt')?
  138. return fmt
  139. def is_integer(o):
  140. """ portable test if an object is like an int """
  141. return isinstance(o, IntTypes)
  142. def is_unicode(o):
  143. """ portable check if an object is unicode and not bytes """
  144. return isinstance(o, UnicodeType)
  145. def is_string(o):
  146. """ portable check if an object is string-like """
  147. return isinstance(o, StringTypes)
  148. def is_bytes(o):
  149. """ portable check if an object is an immutable byte array """
  150. return isinstance(o, BytesType)
  151. _int_regex = re.compile(br"[-+]?\d+")
  152. _real_regex = re.compile(br"[-+]?(?:(\d+(\.\d*)?|\d*\.\d+)([eE][-+]?\d+)?)|[-+]?inf|[-+]?nan")
  153. _alpha_regex = re.compile(br"[a-zA-Z]+")
  154. _true_regex = re.compile(br"TRUE|true|\b[Tt]\b")
  155. _false_regex = re.compile(br"FALSE|false|\b[Ff]\b")
  156. _date_regex = re.compile(r"(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T"
  157. r"(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})"
  158. r"(?P<second_float>(\.\d+)?)Z")
  159. #date: d"YYYY-MM-DDTHH:MM:SS.FFFFFFZ"
  160. def _str_to_bytes(s):
  161. if is_unicode(s):
  162. return s.encode('utf-8')
  163. else:
  164. return s
  165. def _format_datestr(v):
  166. """
  167. Formats a datetime or date object into the string format shared by
  168. xml and notation serializations.
  169. """
  170. if not isinstance(v, datetime.date) and not isinstance(v, datetime.datetime):
  171. raise LLSDParseError("invalid date string %s passed to date formatter" % s)
  172. if not isinstance(v, datetime.datetime):
  173. v = datetime.datetime.combine(v, datetime.time(0))
  174. return _str_to_bytes(v.isoformat() + 'Z')
  175. def _parse_datestr(datestr):
  176. """
  177. Parses a datetime object from the string format shared by
  178. xml and notation serializations.
  179. """
  180. if datestr == "":
  181. return datetime.datetime(1970, 1, 1)
  182. match = re.match(_date_regex, datestr)
  183. if not match:
  184. raise LLSDParseError("invalid date string '%s'." % datestr)
  185. year = int(match.group('year'))
  186. month = int(match.group('month'))
  187. day = int(match.group('day'))
  188. hour = int(match.group('hour'))
  189. minute = int(match.group('minute'))
  190. second = int(match.group('second'))
  191. seconds_float = match.group('second_float')
  192. usec = 0
  193. if seconds_float:
  194. usec = int(float('0' + seconds_float) * 1e6)
  195. return datetime.datetime(year, month, day, hour, minute, second, usec)
  196. def _bool_to_python(node):
  197. "Convert boolean node to a python object."
  198. val = node.text or ''
  199. try:
  200. # string value, accept 'true' or 'True' or whatever
  201. return (val.lower() == 'true')
  202. except AttributeError:
  203. # not a string (no lower() method), use normal Python rules
  204. return bool(val)
  205. def _int_to_python(node):
  206. "Convert integer node to a python object."
  207. val = node.text or ''
  208. if not val.strip():
  209. return 0
  210. return int(val)
  211. def _real_to_python(node):
  212. "Convert floating point node to a python object."
  213. val = node.text or ''
  214. if not val.strip():
  215. return 0.0
  216. return float(val)
  217. def _uuid_to_python(node):
  218. "Convert uuid node to a python object."
  219. if node.text:
  220. return uuid.UUID(hex=node.text)
  221. return uuid.UUID(int=0)
  222. def _str_to_python(node):
  223. "Convert string node to a python object."
  224. return node.text or ''
  225. def _bin_to_python(node):
  226. base = node.get('encoding') or 'base64'
  227. try:
  228. if base == 'base16':
  229. # parse base16 encoded data
  230. return binary(base64.b16decode(node.text or ''))
  231. elif base == 'base64':
  232. # parse base64 encoded data
  233. return binary(base64.b64decode(node.text or ''))
  234. elif base == 'base85':
  235. return LLSDParseError("Parser doesn't support base85 encoding")
  236. except binascii.Error as exc:
  237. # convert exception class so it's more catchable
  238. return LLSDParseError("Encoded binary data: " + str(exc))
  239. except TypeError as exc:
  240. # convert exception class so it's more catchable
  241. return LLSDParseError("Bad binary data: " + str(exc))
  242. def _date_to_python(node):
  243. "Convert date node to a python object."
  244. val = node.text or ''
  245. if not val:
  246. val = "1970-01-01T00:00:00Z"
  247. return _parse_datestr(val)
  248. def _uri_to_python(node):
  249. "Convert uri node to a python object."
  250. val = node.text or ''
  251. return uri(val)
  252. def _map_to_python(node):
  253. "Convert map node to a python object."
  254. result = {}
  255. for index in range(len(node))[::2]:
  256. if node[index].text is None:
  257. result[''] = _to_python(node[index+1])
  258. else:
  259. result[node[index].text] = _to_python(node[index+1])
  260. return result
  261. def _array_to_python(node):
  262. "Convert array node to a python object."
  263. return [_to_python(child) for child in node]
  264. NODE_HANDLERS = dict(
  265. undef=lambda x: None,
  266. boolean=_bool_to_python,
  267. integer=_int_to_python,
  268. real=_real_to_python,
  269. uuid=_uuid_to_python,
  270. string=_str_to_python,
  271. binary=_bin_to_python,
  272. date=_date_to_python,
  273. uri=_uri_to_python,
  274. map=_map_to_python,
  275. array=_array_to_python,
  276. )
  277. def _to_python(node):
  278. "Convert node to a python object."
  279. return NODE_HANDLERS[node.tag](node)
  280. if PY2:
  281. ALL_CHARS = str(bytearray(range(256)))
  282. else:
  283. ALL_CHARS = bytes(range(256))
  284. INVALID_XML_BYTES = b'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0b\x0c'\
  285. b'\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18'\
  286. b'\x19\x1a\x1b\x1c\x1d\x1e\x1f'
  287. INVALID_XML_RE = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f]')
  288. def remove_invalid_xml_bytes(b):
  289. try:
  290. # Dropping chars that cannot be parsed later on. The
  291. # translate() function was benchmarked to be the fastest way
  292. # to do this.
  293. return b.translate(ALL_CHARS, INVALID_XML_BYTES)
  294. except TypeError:
  295. # we get here if s is a unicode object (should be limited to
  296. # unit tests)
  297. return INVALID_XML_RE.sub('', b)
  298. class LLSDBaseFormatter(object):
  299. """
  300. This base class cannot be instantiated on its own: it assumes a subclass
  301. containing methods with canonical names specified in self.__init__(). The
  302. role of this base class is to provide self.type_map based on the methods
  303. defined in its subclass.
  304. """
  305. def __init__(self):
  306. "Construct a new formatter dispatch table."
  307. self.type_map = {
  308. type(None) : self.UNDEF,
  309. bool : self.BOOLEAN,
  310. int : self.INTEGER,
  311. LongType : self.INTEGER,
  312. float : self.REAL,
  313. uuid.UUID : self.UUID,
  314. binary : self.BINARY,
  315. str : self.STRING,
  316. UnicodeType : self.STRING,
  317. uri : self.URI,
  318. datetime.datetime : self.DATE,
  319. datetime.date : self.DATE,
  320. list : self.ARRAY,
  321. tuple : self.ARRAY,
  322. types.GeneratorType : self.ARRAY,
  323. dict : self.MAP,
  324. LLSD : self.LLSD
  325. }
  326. class LLSDXMLFormatter(LLSDBaseFormatter):
  327. """
  328. Class which implements LLSD XML serialization..
  329. http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
  330. This class wraps both a pure python and c-extension for formatting
  331. a limited subset of python objects as application/llsd+xml. You do
  332. not generally need to make an instance of this object since the
  333. module level format_xml is the most convenient interface to this
  334. functionality.
  335. """
  336. def _elt(self, name, contents=None):
  337. "Serialize a single element."
  338. if not contents:
  339. return B("<%s />") % (name,)
  340. else:
  341. return B("<%s>%s</%s>") % (name, _str_to_bytes(contents), name)
  342. def xml_esc(self, v):
  343. "Escape string or unicode object v for xml output"
  344. if is_string(v):
  345. # we need to drop these invalid characters because they
  346. # cannot be parsed (and encode() doesn't drop them for us)
  347. v = v.replace(u'\uffff', u'')
  348. v = v.replace(u'\ufffe', u'')
  349. v = v.encode('utf-8')
  350. v = remove_invalid_xml_bytes(v)
  351. return v.replace(b'&',b'&amp;').replace(b'<',b'&lt;').replace(b'>',b'&gt;')
  352. def LLSD(self, v):
  353. return self._generate(v.thing)
  354. def UNDEF(self, _v):
  355. return self._elt(b'undef')
  356. def BOOLEAN(self, v):
  357. if v:
  358. return self._elt(b'boolean', b'true')
  359. else:
  360. return self._elt(b'boolean', b'false')
  361. def INTEGER(self, v):
  362. return self._elt(b'integer', str(v))
  363. def REAL(self, v):
  364. return self._elt(b'real', repr(v))
  365. def UUID(self, v):
  366. if v.int == 0:
  367. return self._elt(b'uuid')
  368. else:
  369. return self._elt(b'uuid', str(v))
  370. def BINARY(self, v):
  371. return self._elt(b'binary', base64.b64encode(v).strip())
  372. def STRING(self, v):
  373. return self._elt(b'string', self.xml_esc(v))
  374. def URI(self, v):
  375. return self._elt(b'uri', self.xml_esc(str(v)))
  376. def DATE(self, v):
  377. return self._elt(b'date', _format_datestr(v))
  378. def ARRAY(self, v):
  379. return self._elt(
  380. b'array',
  381. b''.join([self._generate(item) for item in v]))
  382. def MAP(self, v):
  383. return self._elt(
  384. b'map',
  385. b''.join([B("%s%s") % (self._elt(b'key', self.xml_esc(UnicodeType(key))),
  386. self._generate(value))
  387. for key, value in v.items()]))
  388. typeof = type
  389. def _generate(self, something):
  390. "Generate xml from a single python object."
  391. t = self.typeof(something)
  392. if t in self.type_map:
  393. return self.type_map[t](something)
  394. else:
  395. raise LLSDSerializationError(
  396. "Cannot serialize unknown type: %s (%s)" % (t, something))
  397. def _format(self, something):
  398. "Pure Python implementation of the formatter."
  399. return b'<?xml version="1.0" ?>' + self._elt(b"llsd", self._generate(something))
  400. def format(self, something):
  401. """
  402. Format a python object as application/llsd+xml
  403. :param something: A python object (typically a dict) to be serialized.
  404. :returns: Returns an XML formatted string.
  405. """
  406. return self._format(something)
  407. _g_xml_formatter = None
  408. def format_xml(something):
  409. """
  410. Format a python object as application/llsd+xml
  411. :param something: a python object (typically a dict) to be serialized.
  412. :returns: Returns an XML formatted string.
  413. Ssee http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
  414. This function wraps both a pure python and c-extension for formatting
  415. a limited subset of python objects as application/llsd+xml.
  416. """
  417. global _g_xml_formatter
  418. if _g_xml_formatter is None:
  419. _g_xml_formatter = LLSDXMLFormatter()
  420. return _g_xml_formatter.format(something)
  421. class LLSDXMLPrettyFormatter(LLSDXMLFormatter):
  422. """
  423. Class which implements 'pretty' LLSD XML serialization..
  424. See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
  425. The output conforms to the LLSD DTD, unlike the output from the
  426. standard python xml.dom DOM::toprettyxml() method which does not
  427. preserve significant whitespace.
  428. This class is not necessarily suited for serializing very large objects.
  429. It sorts on dict (llsd map) keys alphabetically to ease human reading.
  430. """
  431. def __init__(self, indent_atom = None):
  432. "Construct a pretty serializer."
  433. # Call the super class constructor so that we have the type map
  434. super(LLSDXMLPrettyFormatter, self).__init__()
  435. # Override the type map to use our specialized formatters to
  436. # emit the pretty output.
  437. self.type_map[list] = self.PRETTY_ARRAY
  438. self.type_map[tuple] = self.PRETTY_ARRAY
  439. self.type_map[types.GeneratorType] = self.PRETTY_ARRAY,
  440. self.type_map[dict] = self.PRETTY_MAP
  441. # Private data used for indentation.
  442. self._indent_level = 1
  443. if indent_atom is None:
  444. self._indent_atom = b' '
  445. else:
  446. self._indent_atom = indent_atom
  447. def _indent(self):
  448. "Return an indentation based on the atom and indentation level."
  449. return self._indent_atom * self._indent_level
  450. def PRETTY_ARRAY(self, v):
  451. "Recursively format an array with pretty turned on."
  452. rv = []
  453. rv.append(b'<array>\n')
  454. self._indent_level = self._indent_level + 1
  455. rv.extend([B("%s%s\n") %
  456. (self._indent(),
  457. self._generate(item))
  458. for item in v])
  459. self._indent_level = self._indent_level - 1
  460. rv.append(self._indent())
  461. rv.append(b'</array>')
  462. return b''.join(rv)
  463. def PRETTY_MAP(self, v):
  464. "Recursively format a map with pretty turned on."
  465. rv = []
  466. rv.append(b'<map>\n')
  467. self._indent_level = self._indent_level + 1
  468. # list of keys
  469. keys = list(v)
  470. keys.sort()
  471. rv.extend([B("%s%s\n%s%s\n") %
  472. (self._indent(),
  473. self._elt(b'key', UnicodeType(key)),
  474. self._indent(),
  475. self._generate(v[key]))
  476. for key in keys])
  477. self._indent_level = self._indent_level - 1
  478. rv.append(self._indent())
  479. rv.append(b'</map>')
  480. return b''.join(rv)
  481. def format(self, something):
  482. """
  483. Format a python object as application/llsd+xml
  484. :param something: a python object (typically a dict) to be serialized.
  485. :returns: Returns an XML formatted string.
  486. """
  487. data = []
  488. data.append(b'<?xml version="1.0" ?>\n<llsd>')
  489. data.append(self._generate(something))
  490. data.append(b'</llsd>\n')
  491. return b'\n'.join(data)
  492. def format_pretty_xml(something):
  493. """
  494. Serialize a python object as 'pretty' application/llsd+xml.
  495. :param something: a python object (typically a dict) to be serialized.
  496. :returns: Returns an XML formatted string.
  497. See http://wiki.secondlife.com/wiki/LLSD#XML_Serialization
  498. The output conforms to the LLSD DTD, unlike the output from the
  499. standard python xml.dom DOM::toprettyxml() method which does not
  500. preserve significant whitespace.
  501. This function is not necessarily suited for serializing very large
  502. objects. It sorts on dict (llsd map) keys alphabetically to ease human
  503. reading.
  504. """
  505. return LLSDXMLPrettyFormatter().format(something)
  506. class LLSDNotationFormatter(LLSDBaseFormatter):
  507. """
  508. Serialize a python object as application/llsd+notation
  509. See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
  510. """
  511. def LLSD(self, v):
  512. return self._generate(v.thing)
  513. def UNDEF(self, v):
  514. return b'!'
  515. def BOOLEAN(self, v):
  516. if v:
  517. return b'true'
  518. else:
  519. return b'false'
  520. def INTEGER(self, v):
  521. return B("i%d") % v
  522. def REAL(self, v):
  523. return B("r%r") % v
  524. def UUID(self, v):
  525. # latin-1 is the byte-to-byte encoding, mapping \x00-\xFF ->
  526. # \u0000-\u00FF. It's also the fastest encoding, I believe, from
  527. # https://docs.python.org/3/library/codecs.html#encodings-and-unicode
  528. # UUID doesn't like the hex to be a bytes object, so I have to
  529. # convert it to a string. I chose latin-1 to exactly match the old
  530. # error behavior in case someone passes an invalid hex string, with
  531. # things other than 0-9a-fA-F, so that they will fail in the UUID
  532. # decode, rather than with a UnicodeError.
  533. return B("u%s") % str(v).encode('latin-1')
  534. def BINARY(self, v):
  535. return b'b64"' + base64.b64encode(v).strip() + b'"'
  536. def STRING(self, v):
  537. return B("'%s'") % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b"'", b"\\'")
  538. def URI(self, v):
  539. return B('l"%s"') % _str_to_bytes(v).replace(b"\\", b"\\\\").replace(b'"', b'\\"')
  540. def DATE(self, v):
  541. return B('d"%s"') % _format_datestr(v)
  542. def ARRAY(self, v):
  543. return B("[%s]") % b','.join([self._generate(item) for item in v])
  544. def MAP(self, v):
  545. return B("{%s}") % b','.join([B("'%s':%s") % (_str_to_bytes(UnicodeType(key)).replace(b"\\", b"\\\\").replace(b"'", b"\\'"), self._generate(value))
  546. for key, value in v.items()])
  547. def _generate(self, something):
  548. "Generate notation from a single python object."
  549. t = type(something)
  550. handler = self.type_map.get(t)
  551. if handler:
  552. return handler(something)
  553. else:
  554. try:
  555. return self.ARRAY(iter(something))
  556. except TypeError:
  557. raise LLSDSerializationError(
  558. "Cannot serialize unknown type: %s (%s)" % (t, something))
  559. def format(self, something):
  560. """
  561. Format a python object as application/llsd+notation
  562. :param something: a python object (typically a dict) to be serialized.
  563. :returns: Returns a LLSD notation formatted string.
  564. """
  565. return self._generate(something)
  566. def format_notation(something):
  567. """
  568. Format a python object as application/llsd+notation
  569. :param something: a python object (typically a dict) to be serialized.
  570. :returns: Returns a LLSD notation formatted string.
  571. See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
  572. """
  573. return LLSDNotationFormatter().format(something)
  574. def _hex_as_nybble(hex):
  575. "Accepts a single hex character and returns a nybble."
  576. if (hex >= b'0') and (hex <= b'9'):
  577. return ord(hex) - ord(b'0')
  578. elif (hex >= b'a') and (hex <=b'f'):
  579. return 10 + ord(hex) - ord(b'a')
  580. elif (hex >= b'A') and (hex <=b'F'):
  581. return 10 + ord(hex) - ord(b'A')
  582. else:
  583. raise LLSDParseError('Invalid hex character: %s' % hex)
  584. class LLSDBaseParser(object):
  585. """
  586. Utility methods useful for parser subclasses.
  587. """
  588. def __init__(self):
  589. self._buffer = b''
  590. self._index = 0
  591. def _error(self, message, offset=0):
  592. try:
  593. byte = self._buffer[self._index+offset]
  594. except IndexError:
  595. byte = None
  596. raise LLSDParseError("%s at byte %d: %s" % (message, self._index+offset, byte))
  597. def _peek(self, num=1):
  598. if num < 0:
  599. # There aren't many ways this can happen. The likeliest is that
  600. # we've just read garbage length bytes from a binary input string.
  601. # We happen to know that lengths are encoded as 4 bytes, so back
  602. # off by 4 bytes to try to point the user at the right spot.
  603. self._error("Invalid length field %d" % num, -4)
  604. if self._index + num > len(self._buffer):
  605. self._error("Trying to read past end of buffer")
  606. return self._buffer[self._index:self._index + num]
  607. def _getc(self, num=1):
  608. chars = self._peek(num)
  609. self._index += num
  610. return chars
  611. # map char following escape char to corresponding character
  612. _escaped = {
  613. b'a': b'\a',
  614. b'b': b'\b',
  615. b'f': b'\f',
  616. b'n': b'\n',
  617. b'r': b'\r',
  618. b't': b'\t',
  619. b'v': b'\v',
  620. }
  621. def _parse_string_delim(self, delim):
  622. "Parse a delimited string."
  623. parts = bytearray()
  624. found_escape = False
  625. found_hex = False
  626. found_digit = False
  627. byte = 0
  628. while True:
  629. cc = self._getc()
  630. if found_escape:
  631. if found_hex:
  632. if found_digit:
  633. found_escape = False
  634. found_hex = False
  635. found_digit = False
  636. byte <<= 4
  637. byte |= _hex_as_nybble(cc)
  638. parts.append(byte)
  639. byte = 0
  640. else:
  641. found_digit = True
  642. byte = _hex_as_nybble(cc)
  643. elif cc == b'x':
  644. found_hex = True
  645. else:
  646. found_escape = False
  647. # escape char preceding anything other than the chars in
  648. # _escaped just results in that same char without the
  649. # escape char
  650. parts.extend(self._escaped.get(cc, cc))
  651. elif cc == b'\\':
  652. found_escape = True
  653. elif cc == delim:
  654. break
  655. else:
  656. parts.extend(cc)
  657. try:
  658. return parts.decode('utf-8')
  659. except UnicodeDecodeError as exc:
  660. self._error(exc)
  661. class LLSDBinaryParser(LLSDBaseParser):
  662. """
  663. Parse application/llsd+binary to a python object.
  664. See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
  665. """
  666. def __init__(self):
  667. super(LLSDBinaryParser, self).__init__()
  668. # One way of dispatching based on the next character we see would be a
  669. # dict lookup, and indeed that's the best way to express it in source.
  670. _dispatch_dict = {
  671. b'{': self._parse_map,
  672. b'[': self._parse_array,
  673. b'!': lambda: None,
  674. b'0': lambda: False,
  675. b'1': lambda: True,
  676. # 'i' = integer
  677. b'i': lambda: struct.unpack("!i", self._getc(4))[0],
  678. # 'r' = real number
  679. b'r': lambda: struct.unpack("!d", self._getc(8))[0],
  680. # 'u' = uuid
  681. b'u': lambda: uuid.UUID(bytes=self._getc(16)),
  682. # 's' = string
  683. b's': self._parse_string,
  684. # delimited/escaped string
  685. b"'": lambda: self._parse_string_delim(b"'"),
  686. b'"': lambda: self._parse_string_delim(b'"'),
  687. # 'l' = uri
  688. b'l': lambda: uri(self._parse_string()),
  689. # 'd' = date in seconds since epoch
  690. b'd': self._parse_date,
  691. # 'b' = binary
  692. # *NOTE: if not self._keep_binary, maybe have a binary placeholder
  693. # which has the length.
  694. b'b': lambda: binary(self._parse_string_raw()) if self._keep_binary else None,
  695. }
  696. # But in fact it should be even faster to construct a list indexed by
  697. # ord(char). Start by filling it with the 'else' case. Use offset=-1
  698. # because by the time we perform this lookup, we've scanned past the
  699. # lookup char.
  700. self._dispatch = 256*[lambda: self._error("invalid binary token", -1)]
  701. # Now use the entries in _dispatch_dict to set the corresponding
  702. # entries in _dispatch.
  703. for c, func in _dispatch_dict.items():
  704. self._dispatch[ord(c)] = func
  705. def parse(self, buffer, ignore_binary = False):
  706. """
  707. This is the basic public interface for parsing.
  708. :param buffer: the binary data to parse in an indexable sequence.
  709. :param ignore_binary: parser throws away data in llsd binary nodes.
  710. :returns: returns a python object.
  711. """
  712. self._buffer = buffer
  713. self._index = 0
  714. self._keep_binary = not ignore_binary
  715. try:
  716. return self._parse()
  717. except struct.error as exc:
  718. self._error(exc)
  719. def _parse(self):
  720. "The actual parser which is called recursively when necessary."
  721. cc = self._getc()
  722. try:
  723. func = self._dispatch[ord(cc)]
  724. except IndexError:
  725. self._error("invalid binary token", -1)
  726. else:
  727. return func()
  728. def _parse_map(self):
  729. "Parse a single llsd map"
  730. rv = {}
  731. size = struct.unpack("!i", self._getc(4))[0]
  732. count = 0
  733. cc = self._getc()
  734. key = b''
  735. while (cc != b'}') and (count < size):
  736. if cc == b'k':
  737. key = self._parse_string()
  738. elif cc in (b"'", b'"'):
  739. key = self._parse_string_delim(cc)
  740. else:
  741. self._error("invalid map key", -1)
  742. value = self._parse()
  743. rv[key] = value
  744. count += 1
  745. cc = self._getc()
  746. if cc != b'}':
  747. self._error("invalid map close token")
  748. return rv
  749. def _parse_array(self):
  750. "Parse a single llsd array"
  751. rv = []
  752. size = struct.unpack("!i", self._getc(4))[0]
  753. count = 0
  754. cc = self._peek()
  755. while (cc != b']') and (count < size):
  756. rv.append(self._parse())
  757. count += 1
  758. cc = self._peek()
  759. if cc != b']':
  760. self._error("invalid array close token")
  761. self._index += 1
  762. return rv
  763. def _parse_string(self):
  764. try:
  765. return self._parse_string_raw().decode('utf-8')
  766. except UnicodeDecodeError as exc:
  767. self._error(exc)
  768. def _parse_string_raw(self):
  769. "Parse a string which has the leadings size indicator"
  770. try:
  771. size = struct.unpack("!i", self._getc(4))[0]
  772. except struct.error as exc:
  773. # convert exception class for client convenience
  774. self._error("struct " + str(exc))
  775. rv = self._getc(size)
  776. return rv
  777. def _parse_date(self):
  778. seconds = struct.unpack("<d", self._getc(8))[0]
  779. try:
  780. return datetime.datetime.utcfromtimestamp(seconds)
  781. except OverflowError as exc:
  782. # A garbage seconds value can cause utcfromtimestamp() to raise
  783. # OverflowError: timestamp out of range for platform time_t
  784. self._error(exc, -8)
  785. class LLSDNotationParser(LLSDBaseParser):
  786. """
  787. Parse LLSD notation.
  788. See http://wiki.secondlife.com/wiki/LLSD#Notation_Serialization
  789. * map: { string:object, string:object }
  790. * array: [ object, object, object ]
  791. * undef: !
  792. * boolean: true | false | 1 | 0 | T | F | t | f | TRUE | FALSE
  793. * integer: i####
  794. * real: r####
  795. * uuid: u####
  796. * string: "g\'day" | 'have a "nice" day' | s(size)"raw data"
  797. * uri: l"escaped"
  798. * date: d"YYYY-MM-DDTHH:MM:SS.FFZ"
  799. * binary: b##"ff3120ab1" | b(size)"raw data"
  800. """
  801. def __init__(self):
  802. super(LLSDNotationParser, self).__init__()
  803. # Like LLSDBinaryParser, we want to dispatch based on the current
  804. # character.
  805. _dispatch_dict = {
  806. # map
  807. b'{': self._parse_map,
  808. # array
  809. b'[': self._parse_array,
  810. # undefined -- have to eat the '!'
  811. b'!': lambda: self._skip_then(None),
  812. # false -- have to eat the '0'
  813. b'0': lambda: self._skip_then(False),
  814. # true -- have to eat the '1'
  815. b'1': lambda: self._skip_then(True),
  816. # false, must check for F|f|false|FALSE
  817. b'F': lambda: self._get_re("'false'", _false_regex, False),
  818. b'f': lambda: self._get_re("'false'", _false_regex, False),
  819. # true, must check for T|t|true|TRUE
  820. b'T': lambda: self._get_re("'true'", _true_regex, True),
  821. b't': lambda: self._get_re("'true'", _true_regex, True),
  822. # 'i' = integer
  823. b'i': self._parse_integer,
  824. # 'r' = real number
  825. b'r': self._parse_real,
  826. # 'u' = uuid
  827. b'u': self._parse_uuid,
  828. # string
  829. b"'": self._parse_string,
  830. b'"': self._parse_string,
  831. b's': self._parse_string,
  832. # 'l' = uri
  833. b'l': self._parse_uri,
  834. # 'd' = date in seconds since epoch
  835. b'd': self._parse_date,
  836. # 'b' = binary
  837. b'b': self._parse_binary,
  838. }
  839. # Like LLSDBinaryParser, construct a lookup list from this dict. Start
  840. # by filling with the 'else' case.
  841. self._dispatch = 256*[lambda: self._error("Invalid notation token")]
  842. # Then fill in specific entries based on the dict above.
  843. for c, func in _dispatch_dict.items():
  844. self._dispatch[ord(c)] = func
  845. def parse(self, buffer, ignore_binary = False):
  846. """
  847. This is the basic public interface for parsing.
  848. :param buffer: the notation string to parse.
  849. :param ignore_binary: parser throws away data in llsd binary nodes.
  850. :returns: returns a python object.
  851. """
  852. if buffer == b"":
  853. return False
  854. self._buffer = buffer
  855. self._index = 0
  856. return self._parse()
  857. def _get_until(self, delim):
  858. start = self._index
  859. end = self._buffer.find(delim, start)
  860. if end == -1:
  861. return None
  862. else:
  863. self._index = end + 1
  864. return self._buffer[start:end]
  865. def _skip_then(self, value):
  866. # We've already _peek()ed at the current character, which is how we
  867. # decided to call this method. Skip past it and return constant value.
  868. self._getc()
  869. return value
  870. def _get_re(self, desc, regex, override=None):
  871. match = re.match(regex, self._buffer[self._index:])
  872. if not match:
  873. self._error("Invalid %s token" % desc)
  874. else:
  875. self._index += match.end()
  876. return override if override is not None else match.group(0)
  877. def _parse(self):
  878. "The notation parser workhorse."
  879. cc = self._peek()
  880. try:
  881. func = self._dispatch[ord(cc)]
  882. except IndexError:
  883. # output error if the token was out of range
  884. self._error("Invalid notation token")
  885. else:
  886. return func()
  887. def _parse_binary(self):
  888. "parse a single binary object."
  889. self._getc() # eat the beginning 'b'
  890. cc = self._peek()
  891. if cc == b'(':
  892. # parse raw binary
  893. paren = self._getc()
  894. # grab the 'expected' size of the binary data
  895. size = self._get_until(b')')
  896. if size == None:
  897. self._error("Invalid binary size")
  898. size = int(size)
  899. # grab the opening quote
  900. q = self._getc()
  901. if q != b'"':
  902. self._error('Expected " to start binary value')
  903. # grab the data
  904. data = self._getc(size)
  905. # grab the closing quote
  906. q = self._getc()
  907. if q != b'"':
  908. self._error('Expected " to end binary value')
  909. return binary(data)
  910. else:
  911. # get the encoding base
  912. base = self._getc(2)
  913. try:
  914. decoder = {
  915. b'16': base64.b16decode,
  916. b'64': base64.b64decode,
  917. }[base]
  918. except KeyError:
  919. self._error("Parser doesn't support base %s encoding" %
  920. base.decode('latin-1'))
  921. # grab the double quote
  922. q = self._getc()
  923. if q != b'"':
  924. self._error('Expected " to start binary value')
  925. # grab the encoded data
  926. encoded = self._get_until(q)
  927. try:
  928. return binary(decoder(encoded or b''))
  929. except binascii.Error as exc:
  930. # convert exception class so it's more catchable
  931. self._error("Encoded binary data: " + str(exc))
  932. except TypeError as exc:
  933. # convert exception class so it's more catchable
  934. self._error("Bad binary data: " + str(exc))
  935. def _parse_map(self):
  936. """
  937. parse a single map
  938. map: { string:object, string:object }
  939. """
  940. rv = {}
  941. key = b''
  942. found_key = False
  943. self._getc() # eat the beginning '{'
  944. cc = self._peek()
  945. while (cc != b'}'):
  946. if cc is None:
  947. self._error("Unclosed map")
  948. if not found_key:
  949. if cc in (b"'", b'"', b's'):
  950. key = self._parse_string()
  951. found_key = True
  952. elif cc.isspace() or cc == b',':
  953. self._getc() # eat the character
  954. pass
  955. else:
  956. self._error("Invalid map key")
  957. elif cc.isspace():
  958. self._getc() # eat the space
  959. pass
  960. elif cc == b':':
  961. self._getc() # eat the ':'
  962. value = self._parse()
  963. rv[key] = value
  964. found_key = False
  965. else:
  966. self._error("missing separator")
  967. cc = self._peek()
  968. if self._getc() != b'}':
  969. self._error("Invalid map close token")
  970. return rv
  971. def _parse_array(self):
  972. """
  973. parse a single array.
  974. array: [ object, object, object ]
  975. """
  976. rv = []
  977. self._getc() # eat the beginning '['
  978. cc = self._peek()
  979. while (cc != b']'):
  980. if cc is None:
  981. self._error('Unclosed array')
  982. if cc.isspace() or cc == b',':
  983. self._getc()
  984. cc = self._peek()
  985. continue
  986. rv.append(self._parse())
  987. cc = self._peek()
  988. if self._getc() != b']':
  989. self._error("Invalid array close token")
  990. return rv
  991. def _parse_uuid(self):
  992. "Parse a uuid."
  993. self._getc() # eat the beginning 'u'
  994. # see comment on LLSDNotationFormatter.UUID() re use of latin-1
  995. return uuid.UUID(hex=self._getc(36).decode('latin-1'))
  996. def _parse_uri(self):
  997. "Parse a URI."
  998. self._getc() # eat the beginning 'l'
  999. return uri(self._parse_string())
  1000. def _parse_date(self):
  1001. "Parse a date."
  1002. self._getc() # eat the beginning 'd'
  1003. datestr = self._parse_string()
  1004. return _parse_datestr(datestr)
  1005. def _parse_real(self):
  1006. "Parse a floating point number."
  1007. self._getc() # eat the beginning 'r'
  1008. return float(self._get_re("real", _real_regex))
  1009. def _parse_integer(self):
  1010. "Parse an integer."
  1011. self._getc() # eat the beginning 'i'
  1012. return int(self._get_re("integer", _int_regex))
  1013. def _parse_string(self):
  1014. """
  1015. Parse a string
  1016. string: "g\'day" | 'have a "nice" day' | s(size)"raw data"
  1017. """
  1018. rv = ""
  1019. delim = self._peek()
  1020. if delim in (b"'", b'"'):
  1021. delim = self._getc() # eat the beginning delim
  1022. rv = self._parse_string_delim(delim)
  1023. elif delim == b's':
  1024. rv = self._parse_string_raw()
  1025. else:
  1026. self._error("invalid string token")
  1027. return rv
  1028. def _parse_string_raw(self):
  1029. """
  1030. Parse a sized specified string.
  1031. string: s(size)"raw data"
  1032. """
  1033. self._getc() # eat the beginning 's'
  1034. # Read the (size) portion.
  1035. cc = self._getc()
  1036. if cc != b'(':
  1037. self._error("Invalid string token")
  1038. size = self._get_until(b')')
  1039. if size == None:
  1040. self._error("Invalid string size")
  1041. size = int(size)
  1042. delim = self._getc()
  1043. if delim not in (b"'", b'"'):
  1044. self._error("Invalid string token")
  1045. rv = self._getc(size)
  1046. cc = self._getc()
  1047. if cc != delim:
  1048. self._error("Invalid string closure token")
  1049. try:
  1050. return rv.decode('utf-8')
  1051. except UnicodeDecodeError as exc:
  1052. raise LLSDParseError(exc)
  1053. def format_binary(something):
  1054. """
  1055. Format application/llsd+binary to a python object.
  1056. See http://wiki.secondlife.com/wiki/LLSD#Binary_Serialization
  1057. :param something: a python object (typically a dict) to be serialized.
  1058. :returns: Returns a LLSD binary formatted string.
  1059. """
  1060. return b'<?llsd/binary?>\n' + _format_binary_recurse(something)
  1061. def _format_binary_recurse(something):
  1062. "Binary formatter workhorse."
  1063. def _format_list(something):
  1064. array_builder = []
  1065. array_builder.append(b'[' + struct.pack('!i', len(something)))
  1066. for item in something:
  1067. array_builder.append(_format_binary_recurse(item))
  1068. array_builder.append(b']')
  1069. return b''.join(array_builder)
  1070. if something is None:
  1071. return b'!'
  1072. elif isinstance(something, LLSD):
  1073. return _format_binary_recurse(something.thing)
  1074. elif isinstance(something, bool):
  1075. if something:
  1076. return b'1'
  1077. else:
  1078. return b'0'
  1079. elif is_integer(something):
  1080. try:
  1081. return b'i' + struct.pack('!i', something)
  1082. except (OverflowError, struct.error) as exc:
  1083. raise LLSDSerializationError(str(exc), something)
  1084. elif isinstance(something, float):
  1085. try:
  1086. return b'r' + struct.pack('!d', something)
  1087. except SystemError as exc:
  1088. raise LLSDSerializationError(str(exc), something)
  1089. elif isinstance(something, uuid.UUID):
  1090. return b'u' + something.bytes
  1091. elif isinstance(something, binary):
  1092. return b'b' + struct.pack('!i', len(something)) + something
  1093. elif is_string(something):
  1094. something = _str_to_bytes(something)
  1095. return b's' + struct.pack('!i', len(something)) + something
  1096. elif isinstance(something, uri):
  1097. return b'l' + struct.pack('!i', len(something)) + something
  1098. elif isinstance(something, datetime.datetime):
  1099. seconds_since_epoch = calendar.timegm(something.utctimetuple()) \
  1100. + something.microsecond // 1e6
  1101. return b'd' + struct.pack('<d', seconds_since_epoch)
  1102. elif isinstance(something, datetime.date):
  1103. seconds_since_epoch = calendar.timegm(something.timetuple())
  1104. return b'd' + struct.pack('<d', seconds_since_epoch)
  1105. elif isinstance(something, (list, tuple)):
  1106. return _format_list(something)
  1107. elif isinstance(something, dict):
  1108. map_builder = []
  1109. map_builder.append(b'{' + struct.pack('!i', len(something)))
  1110. for key, value in something.items():
  1111. key = _str_to_bytes(key)
  1112. map_builder.append(b'k' + struct.pack('!i', len(key)) + key)
  1113. map_builder.append(_format_binary_recurse(value))
  1114. map_builder.append(b'}')
  1115. return b''.join(map_builder)
  1116. else:
  1117. try:
  1118. return _format_list(list(something))
  1119. except TypeError:
  1120. raise LLSDSerializationError(
  1121. "Cannot serialize unknown type: %s (%s)" %
  1122. (type(something), something))
  1123. def _startswith(startstr, something):
  1124. if hasattr(something, 'startswith'):
  1125. return something.startswith(startstr)
  1126. else:
  1127. pos = something.tell()
  1128. s = something.read(len(startstr))
  1129. something.seek(pos, os.SEEK_SET)
  1130. return (s == startstr)
  1131. def parse_binary(something):
  1132. """
  1133. This is the basic public interface for parsing llsd+binary.
  1134. :param something: The data to parse in an indexable sequence.
  1135. :returns: Returns a python object.
  1136. """
  1137. if _startswith(b'<?llsd/binary?>', something):
  1138. just_binary = something.split(b'\n', 1)[1]
  1139. else:
  1140. just_binary = something
  1141. return LLSDBinaryParser().parse(just_binary)
  1142. declaration_regex = re.compile(br'^\s*(?:<\?[\x09\x0A\x0D\x20-\x7e]+\?>)|(?:<llsd>)')
  1143. def validate_xml_declaration(something):
  1144. if not declaration_regex.match(something):
  1145. raise LLSDParseError("Invalid XML Declaration")
  1146. def parse_xml(something):
  1147. """
  1148. This is the basic public interface for parsing llsd+xml.
  1149. :param something: The data to parse.
  1150. :returns: Returns a python object.
  1151. """
  1152. try:
  1153. # validate xml declaration manually until http://bugs.python.org/issue7138 is fixed
  1154. validate_xml_declaration(something)
  1155. return _to_python(fromstring(something)[0])
  1156. except ElementTreeError as err:
  1157. raise LLSDParseError(*err.args)
  1158. def parse_notation(something):
  1159. """
  1160. This is the basic public interface for parsing llsd+notation.
  1161. :param something: The data to parse.
  1162. :returns: Returns a python object.
  1163. """
  1164. return LLSDNotationParser().parse(something)
  1165. def parse(something, mime_type = None):
  1166. """
  1167. This is the basic public interface for parsing llsd.
  1168. :param something: The data to parse. This is expected to be bytes, not strings
  1169. :param mime_type: The mime_type of the data if it is known.
  1170. :returns: Returns a python object.
  1171. Python 3 Note: when reading LLSD from a file, use open()'s 'rb' mode explicitly
  1172. """
  1173. if mime_type in (XML_MIME_TYPE, 'application/llsd'):
  1174. return parse_xml(something)
  1175. elif mime_type == BINARY_MIME_TYPE:
  1176. return parse_binary(something)
  1177. elif mime_type == NOTATION_MIME_TYPE:
  1178. return parse_notation(something)
  1179. #elif content_type == 'application/json':
  1180. # return parse_notation(something)
  1181. try:
  1182. something = something.lstrip() #remove any pre-trailing whitespace
  1183. if _startswith(b'<?llsd/binary?>', something):
  1184. return parse_binary(something)
  1185. # This should be better.
  1186. elif _startswith(b'<', something):
  1187. return parse_xml(something)
  1188. else:
  1189. return parse_notation(something)
  1190. except KeyError as e:
  1191. raise LLSDParseError('LLSD could not be parsed: %s' % (e,))
  1192. except TypeError as e:
  1193. raise LLSDParseError('Input stream not of type bytes. %s' % (e,))
  1194. class LLSD(object):
  1195. "Simple wrapper class for a thing."
  1196. def __init__(self, thing=None):
  1197. self.thing = thing
  1198. def __bytes__(self):
  1199. return self.as_xml(self.thing)
  1200. def __str__(self):
  1201. return self.__bytes__().decode()
  1202. parse = staticmethod(parse)
  1203. as_xml = staticmethod(format_xml)
  1204. as_pretty_xml = staticmethod(format_pretty_xml)
  1205. as_binary = staticmethod(format_binary)
  1206. as_notation = staticmethod(format_notation)
  1207. undef = LLSD(None)