12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403 |
- """Create portable serialized representations of Python objects.
- See module cPickle for a (much) faster implementation.
- See module copy_reg for a mechanism for registering custom picklers.
- See module pickletools source for extensive comments.
- Classes:
- Pickler
- Unpickler
- Functions:
- dump(object, file)
- dumps(object) -> string
- load(file) -> object
- loads(string) -> object
- Misc variables:
- __version__
- format_version
- compatible_formats
- """
- __version__ = "$Revision: 36861 $" # Code version
- from types import *
- from copy_reg import dispatch_table
- from copy_reg import _extension_registry, _inverted_registry, _extension_cache
- import marshal
- import sys
- import struct
- import re
- import warnings
- __all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
- "Unpickler", "dump", "dumps", "load", "loads"]
- # These are purely informational; no code uses these.
- format_version = "2.0" # File format version we write
- compatible_formats = ["1.0", # Original protocol 0
- "1.1", # Protocol 0 with INST added
- "1.2", # Original protocol 1
- "1.3", # Protocol 1 with BINFLOAT added
- "2.0", # Protocol 2
- ] # Old format versions we can read
- # Keep in synch with cPickle. This is the highest protocol number we
- # know how to read.
- HIGHEST_PROTOCOL = 2
- # Why use struct.pack() for pickling but marshal.loads() for
- # unpickling? struct.pack() is 40% faster than marshal.dumps(), but
- # marshal.loads() is twice as fast as struct.unpack()!
- mloads = marshal.loads
- class PickleError(Exception):
- """A common base class for the other pickling exceptions."""
- pass
- class PicklingError(PickleError):
- """This exception is raised when an unpicklable object is passed to the
- dump() method.
- """
- pass
- class UnpicklingError(PickleError):
- """This exception is raised when there is a problem unpickling an object,
- such as a security violation.
- Note that other exceptions may also be raised during unpickling, including
- (but not necessarily limited to) AttributeError, EOFError, ImportError,
- and IndexError.
- """
- pass
- # An instance of _Stop is raised by Unpickler.load_stop() in response to
- # the STOP opcode, passing the object that is the result of unpickling.
- class _Stop(Exception):
- def __init__(self, value):
- self.value = value
- # Jython has PyStringMap; it's a dict subclass with string keys
- try:
- from org.python.core import PyStringMap
- except ImportError:
- PyStringMap = None
- # UnicodeType may or may not be exported (normally imported from types)
- try:
- UnicodeType
- except NameError:
- UnicodeType = None
- # Pickle opcodes. See pickletools.py for extensive docs. The listing
- # here is in kind-of alphabetical order of 1-character pickle code.
- # pickletools groups them by purpose.
- MARK = '(' # push special markobject on stack
- STOP = '.' # every pickle ends with STOP
- POP = '0' # discard topmost stack item
- POP_MARK = '1' # discard stack top through topmost markobject
- DUP = '2' # duplicate top stack item
- FLOAT = 'F' # push float object; decimal string argument
- INT = 'I' # push integer or bool; decimal string argument
- BININT = 'J' # push four-byte signed int
- BININT1 = 'K' # push 1-byte unsigned int
- LONG = 'L' # push long; decimal string argument
- BININT2 = 'M' # push 2-byte unsigned int
- NONE = 'N' # push None
- PERSID = 'P' # push persistent object; id is taken from string arg
- BINPERSID = 'Q' # " " " ; " " " " stack
- REDUCE = 'R' # apply callable to argtuple, both on stack
- STRING = 'S' # push string; NL-terminated string argument
- BINSTRING = 'T' # push string; counted binary string argument
- SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
- UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
- BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
- APPEND = 'a' # append stack top to list below it
- BUILD = 'b' # call __setstate__ or __dict__.update()
- GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
- DICT = 'd' # build a dict from stack items
- EMPTY_DICT = '}' # push empty dict
- APPENDS = 'e' # extend list on stack by topmost stack slice
- GET = 'g' # push item from memo on stack; index is string arg
- BINGET = 'h' # " " " " " " ; " " 1-byte arg
- INST = 'i' # build & push class instance
- LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
- LIST = 'l' # build list from topmost stack items
- EMPTY_LIST = ']' # push empty list
- OBJ = 'o' # build & push class instance
- PUT = 'p' # store stack top in memo; index is string arg
- BINPUT = 'q' # " " " " " ; " " 1-byte arg
- LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
- SETITEM = 's' # add key+value pair to dict
- TUPLE = 't' # build tuple from topmost stack items
- EMPTY_TUPLE = ')' # push empty tuple
- SETITEMS = 'u' # modify dict by adding topmost key+value pairs
- BINFLOAT = 'G' # push float; arg is 8-byte float encoding
- TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
- FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
- # Protocol 2
- PROTO = '\x80' # identify pickle protocol
- NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
- EXT1 = '\x82' # push object from extension registry; 1-byte index
- EXT2 = '\x83' # ditto, but 2-byte index
- EXT4 = '\x84' # ditto, but 4-byte index
- TUPLE1 = '\x85' # build 1-tuple from stack top
- TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
- TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
- NEWTRUE = '\x88' # push True
- NEWFALSE = '\x89' # push False
- LONG1 = '\x8a' # push long from < 256 bytes
- LONG4 = '\x8b' # push really big long
- _tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
- __all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
- del x
- # Pickling machinery
- class Pickler:
- def __init__(self, file, protocol=None, bin=None):
- """This takes a file-like object for writing a pickle data stream.
- The optional protocol argument tells the pickler to use the
- given protocol; supported protocols are 0, 1, 2. The default
- protocol is 0, to be backwards compatible. (Protocol 0 is the
- only protocol that can be written to a file opened in text
- mode and read back successfully. When using a protocol higher
- than 0, make sure the file is opened in binary mode, both when
- pickling and unpickling.)
- Protocol 1 is more efficient than protocol 0; protocol 2 is
- more efficient than protocol 1.
- Specifying a negative protocol version selects the highest
- protocol version supported. The higher the protocol used, the
- more recent the version of Python needed to read the pickle
- produced.
- The file parameter must have a write() method that accepts a single
- string argument. It can thus be an open file object, a StringIO
- object, or any other custom object that meets this interface.
- """
- if protocol is not None and bin is not None:
- raise ValueError, "can't specify both 'protocol' and 'bin'"
- if bin is not None:
- warnings.warn("The 'bin' argument to Pickler() is deprecated",
- DeprecationWarning)
- protocol = bin
- if protocol is None:
- protocol = 0
- if protocol < 0:
- protocol = HIGHEST_PROTOCOL
- elif not 0 <= protocol <= HIGHEST_PROTOCOL:
- raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
- self.write = file.write
- self.memo = {}
- self.proto = int(protocol)
- self.bin = protocol >= 1
- self.fast = 0
- def clear_memo(self):
- """Clears the pickler's "memo".
- The memo is the data structure that remembers which objects the
- pickler has already seen, so that shared or recursive objects are
- pickled by reference and not by value. This method is useful when
- re-using picklers.
- """
- self.memo.clear()
- def dump(self, obj):
- """Write a pickled representation of obj to the open file."""
- if self.proto >= 2:
- self.write(PROTO + chr(self.proto))
- self.save(obj)
- self.write(STOP)
- def memoize(self, obj):
- """Store an object in the memo."""
- # The Pickler memo is a dictionary mapping object ids to 2-tuples
- # that contain the Unpickler memo key and the object being memoized.
- # The memo key is written to the pickle and will become
- # the key in the Unpickler's memo. The object is stored in the
- # Pickler memo so that transient objects are kept alive during
- # pickling.
- # The use of the Unpickler memo length as the memo key is just a
- # convention. The only requirement is that the memo values be unique.
- # But there appears no advantage to any other scheme, and this
- # scheme allows the Unpickler memo to be implemented as a plain (but
- # growable) array, indexed by memo key.
- if self.fast:
- return
- assert id(obj) not in self.memo
- memo_len = len(self.memo)
- self.write(self.put(memo_len))
- self.memo[id(obj)] = memo_len, obj
- # Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
- def put(self, i, pack=struct.pack):
- if self.bin:
- if i < 256:
- return BINPUT + chr(i)
- else:
- return LONG_BINPUT + pack("<i", i)
- return PUT + repr(i) + '\n'
- # Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
- def get(self, i, pack=struct.pack):
- if self.bin:
- if i < 256:
- return BINGET + chr(i)
- else:
- return LONG_BINGET + pack("<i", i)
- return GET + repr(i) + '\n'
- def save(self, obj):
- # Check for persistent id (defined by a subclass)
- pid = self.persistent_id(obj)
- if pid:
- self.save_pers(pid)
- return
- # Check the memo
- x = self.memo.get(id(obj))
- if x:
- self.write(self.get(x[0]))
- return
- # Check the type dispatch table
- t = type(obj)
- f = self.dispatch.get(t)
- if f:
- f(self, obj) # Call unbound method with explicit self
- return
- # Check for a class with a custom metaclass; treat as regular class
- try:
- issc = issubclass(t, TypeType)
- except TypeError: # t is not a class (old Boost; see SF #502085)
- issc = 0
- if issc:
- self.save_global(obj)
- return
- # Check copy_reg.dispatch_table
- reduce = dispatch_table.get(t)
- if reduce:
- rv = reduce(obj)
- else:
- # Check for a __reduce_ex__ method, fall back to __reduce__
- reduce = getattr(obj, "__reduce_ex__", None)
- if reduce:
- rv = reduce(self.proto)
- else:
- reduce = getattr(obj, "__reduce__", None)
- if reduce:
- rv = reduce()
- else:
- raise PicklingError("Can't pickle %r object: %r" %
- (t.__name__, obj))
- # Check for string returned by reduce(), meaning "save as global"
- if type(rv) is StringType:
- self.save_global(obj, rv)
- return
- # Assert that reduce() returned a tuple
- if type(rv) is not TupleType:
- raise PicklingError("%s must return string or tuple" % reduce)
- # Assert that it returned an appropriately sized tuple
- l = len(rv)
- if not (2 <= l <= 5):
- raise PicklingError("Tuple returned by %s must have "
- "two to five elements" % reduce)
- # Save the reduce() output and finally memoize the object
- self.save_reduce(obj=obj, *rv)
- def persistent_id(self, obj):
- # This exists so a subclass can override it
- return None
- def save_pers(self, pid):
- # Save a persistent id reference
- if self.bin:
- self.save(pid)
- self.write(BINPERSID)
- else:
- self.write(PERSID + str(pid) + '\n')
- def save_reduce(self, func, args, state=None,
- listitems=None, dictitems=None, obj=None):
- # This API is called by some subclasses
- # Assert that args is a tuple or None
- if not isinstance(args, TupleType):
- if args is None:
- # A hack for Jim Fulton's ExtensionClass, now deprecated.
- # See load_reduce()
- warnings.warn("__basicnew__ special case is deprecated",
- DeprecationWarning)
- else:
- raise PicklingError(
- "args from reduce() should be a tuple")
- # Assert that func is callable
- if not callable(func):
- raise PicklingError("func from reduce should be callable")
- save = self.save
- write = self.write
- # Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
- if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
- # A __reduce__ implementation can direct protocol 2 to
- # use the more efficient NEWOBJ opcode, while still
- # allowing protocol 0 and 1 to work normally. For this to
- # work, the function returned by __reduce__ should be
- # called __newobj__, and its first argument should be a
- # new-style class. The implementation for __newobj__
- # should be as follows, although pickle has no way to
- # verify this:
- #
- # def __newobj__(cls, *args):
- # return cls.__new__(cls, *args)
- #
- # Protocols 0 and 1 will pickle a reference to __newobj__,
- # while protocol 2 (and above) will pickle a reference to
- # cls, the remaining args tuple, and the NEWOBJ code,
- # which calls cls.__new__(cls, *args) at unpickling time
- # (see load_newobj below). If __reduce__ returns a
- # three-tuple, the state from the third tuple item will be
- # pickled regardless of the protocol, calling __setstate__
- # at unpickling time (see load_build below).
- #
- # Note that no standard __newobj__ implementation exists;
- # you have to provide your own. This is to enforce
- # compatibility with Python 2.2 (pickles written using
- # protocol 0 or 1 in Python 2.3 should be unpicklable by
- # Python 2.2).
- cls = args[0]
- if not hasattr(cls, "__new__"):
- raise PicklingError(
- "args[0] from __newobj__ args has no __new__")
- if obj is not None and cls is not obj.__class__:
- raise PicklingError(
- "args[0] from __newobj__ args has the wrong class")
- args = args[1:]
- save(cls)
- save(args)
- write(NEWOBJ)
- else:
- save(func)
- save(args)
- write(REDUCE)
- if obj is not None:
- self.memoize(obj)
- # More new special cases (that work with older protocols as
- # well): when __reduce__ returns a tuple with 4 or 5 items,
- # the 4th and 5th item should be iterators that provide list
- # items and dict items (as (key, value) tuples), or None.
- if listitems is not None:
- self._batch_appends(listitems)
- if dictitems is not None:
- self._batch_setitems(dictitems)
- if state is not None:
- save(state)
- write(BUILD)
- # Methods below this point are dispatched through the dispatch table
- dispatch = {}
- def save_none(self, obj):
- self.write(NONE)
- dispatch[NoneType] = save_none
- def save_bool(self, obj):
- if self.proto >= 2:
- self.write(obj and NEWTRUE or NEWFALSE)
- else:
- self.write(obj and TRUE or FALSE)
- dispatch[bool] = save_bool
- def save_int(self, obj, pack=struct.pack):
- if self.bin:
- # If the int is small enough to fit in a signed 4-byte 2's-comp
- # format, we can store it more efficiently than the general
- # case.
- # First one- and two-byte unsigned ints:
- if obj >= 0:
- if obj <= 0xff:
- self.write(BININT1 + chr(obj))
- return
- if obj <= 0xffff:
- self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
- return
- # Next check for 4-byte signed ints:
- high_bits = obj >> 31 # note that Python shift sign-extends
- if high_bits == 0 or high_bits == -1:
- # All high bits are copies of bit 2**31, so the value
- # fits in a 4-byte signed int.
- self.write(BININT + pack("<i", obj))
- return
- # Text pickle, or int too big to fit in signed 4-byte format.
- self.write(INT + repr(obj) + '\n')
- dispatch[IntType] = save_int
- def save_long(self, obj, pack=struct.pack):
- if self.proto >= 2:
- bytes = encode_long(obj)
- n = len(bytes)
- if n < 256:
- self.write(LONG1 + chr(n) + bytes)
- else:
- self.write(LONG4 + pack("<i", n) + bytes)
- return
- self.write(LONG + repr(obj) + '\n')
- dispatch[LongType] = save_long
- def save_float(self, obj, pack=struct.pack):
- if self.bin:
- self.write(BINFLOAT + pack('>d', obj))
- else:
- self.write(FLOAT + repr(obj) + '\n')
- dispatch[FloatType] = save_float
- def save_string(self, obj, pack=struct.pack):
- if self.bin:
- n = len(obj)
- if n < 256:
- self.write(SHORT_BINSTRING + chr(n) + obj)
- else:
- self.write(BINSTRING + pack("<i", n) + obj)
- else:
- self.write(STRING + repr(obj) + '\n')
- self.memoize(obj)
- dispatch[StringType] = save_string
- def save_unicode(self, obj, pack=struct.pack):
- if self.bin:
- encoding = obj.encode('utf-8')
- n = len(encoding)
- self.write(BINUNICODE + pack("<i", n) + encoding)
- else:
- obj = obj.replace("\\", "\\u005c")
- obj = obj.replace("\n", "\\u000a")
- self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
- self.memoize(obj)
- dispatch[UnicodeType] = save_unicode
- if StringType == UnicodeType:
- # This is true for Jython
- def save_string(self, obj, pack=struct.pack):
- unicode = obj.isunicode()
- if self.bin:
- if unicode:
- obj = obj.encode("utf-8")
- l = len(obj)
- if l < 256 and not unicode:
- self.write(SHORT_BINSTRING + chr(l) + obj)
- else:
- s = pack("<i", l)
- if unicode:
- self.write(BINUNICODE + s + obj)
- else:
- self.write(BINSTRING + s + obj)
- else:
- if unicode:
- obj = obj.replace("\\", "\\u005c")
- obj = obj.replace("\n", "\\u000a")
- obj = obj.encode('raw-unicode-escape')
- self.write(UNICODE + obj + '\n')
- else:
- self.write(STRING + repr(obj) + '\n')
- self.memoize(obj)
- dispatch[StringType] = save_string
- def save_tuple(self, obj):
- write = self.write
- proto = self.proto
- n = len(obj)
- if n == 0:
- if proto:
- write(EMPTY_TUPLE)
- else:
- write(MARK + TUPLE)
- return
- save = self.save
- memo = self.memo
- if n <= 3 and proto >= 2:
- for element in obj:
- save(element)
- # Subtle. Same as in the big comment below.
- if id(obj) in memo:
- get = self.get(memo[id(obj)][0])
- write(POP * n + get)
- else:
- write(_tuplesize2code[n])
- self.memoize(obj)
- return
- # proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
- # has more than 3 elements.
- write(MARK)
- for element in obj:
- save(element)
- if id(obj) in memo:
- # Subtle. d was not in memo when we entered save_tuple(), so
- # the process of saving the tuple's elements must have saved
- # the tuple itself: the tuple is recursive. The proper action
- # now is to throw away everything we put on the stack, and
- # simply GET the tuple (it's already constructed). This check
- # could have been done in the "for element" loop instead, but
- # recursive tuples are a rare thing.
- get = self.get(memo[id(obj)][0])
- if proto:
- write(POP_MARK + get)
- else: # proto 0 -- POP_MARK not available
- write(POP * (n+1) + get)
- return
- # No recursion.
- self.write(TUPLE)
- self.memoize(obj)
- dispatch[TupleType] = save_tuple
- # save_empty_tuple() isn't used by anything in Python 2.3. However, I
- # found a Pickler subclass in Zope3 that calls it, so it's not harmless
- # to remove it.
- def save_empty_tuple(self, obj):
- self.write(EMPTY_TUPLE)
- def save_list(self, obj):
- write = self.write
- if self.bin:
- write(EMPTY_LIST)
- else: # proto 0 -- can't use EMPTY_LIST
- write(MARK + LIST)
- self.memoize(obj)
- self._batch_appends(iter(obj))
- dispatch[ListType] = save_list
- # Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
- # out of synch, though.
- _BATCHSIZE = 1000
- def _batch_appends(self, items):
- # Helper to batch up APPENDS sequences
- save = self.save
- write = self.write
- if not self.bin:
- for x in items:
- save(x)
- write(APPEND)
- return
- r = xrange(self._BATCHSIZE)
- while items is not None:
- tmp = []
- for i in r:
- try:
- x = items.next()
- tmp.append(x)
- except StopIteration:
- items = None
- break
- n = len(tmp)
- if n > 1:
- write(MARK)
- for x in tmp:
- save(x)
- write(APPENDS)
- elif n:
- save(tmp[0])
- write(APPEND)
- # else tmp is empty, and we're done
- def save_dict(self, obj):
- write = self.write
- if self.bin:
- write(EMPTY_DICT)
- else: # proto 0 -- can't use EMPTY_DICT
- write(MARK + DICT)
- self.memoize(obj)
- self._batch_setitems(obj.iteritems())
- dispatch[DictionaryType] = save_dict
- if not PyStringMap is None:
- dispatch[PyStringMap] = save_dict
- def _batch_setitems(self, items):
- # Helper to batch up SETITEMS sequences; proto >= 1 only
- save = self.save
- write = self.write
- if not self.bin:
- for k, v in items:
- save(k)
- save(v)
- write(SETITEM)
- return
- r = xrange(self._BATCHSIZE)
- while items is not None:
- tmp = []
- for i in r:
- try:
- tmp.append(items.next())
- except StopIteration:
- items = None
- break
- n = len(tmp)
- if n > 1:
- write(MARK)
- for k, v in tmp:
- save(k)
- save(v)
- write(SETITEMS)
- elif n:
- k, v = tmp[0]
- save(k)
- save(v)
- write(SETITEM)
- # else tmp is empty, and we're done
- def save_inst(self, obj):
- cls = obj.__class__
- memo = self.memo
- write = self.write
- save = self.save
- if hasattr(obj, '__getinitargs__'):
- args = obj.__getinitargs__()
- len(args) # XXX Assert it's a sequence
- _keep_alive(args, memo)
- else:
- args = ()
- write(MARK)
- if self.bin:
- save(cls)
- for arg in args:
- save(arg)
- write(OBJ)
- else:
- for arg in args:
- save(arg)
- write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
- self.memoize(obj)
- try:
- getstate = obj.__getstate__
- except AttributeError:
- stuff = obj.__dict__
- else:
- stuff = getstate()
- _keep_alive(stuff, memo)
- save(stuff)
- write(BUILD)
- dispatch[InstanceType] = save_inst
- def save_global(self, obj, name=None, pack=struct.pack):
- write = self.write
- memo = self.memo
- if name is None:
- name = obj.__name__
- module = getattr(obj, "__module__", None)
- if module is None:
- module = whichmodule(obj, name)
- try:
- __import__(module)
- mod = sys.modules[module]
- klass = getattr(mod, name)
- except (ImportError, KeyError, AttributeError):
- raise PicklingError(
- "Can't pickle %r: it's not found as %s.%s" %
- (obj, module, name))
- else:
- if klass is not obj:
- raise PicklingError(
- "Can't pickle %r: it's not the same object as %s.%s" %
- (obj, module, name))
- if self.proto >= 2:
- code = _extension_registry.get((module, name))
- if code:
- assert code > 0
- if code <= 0xff:
- write(EXT1 + chr(code))
- elif code <= 0xffff:
- write("%c%c%c" % (EXT2, code&0xff, code>>8))
- else:
- write(EXT4 + pack("<i", code))
- return
- write(GLOBAL + module + '\n' + name + '\n')
- self.memoize(obj)
- dispatch[ClassType] = save_global
- dispatch[FunctionType] = save_global
- dispatch[BuiltinFunctionType] = save_global
- dispatch[TypeType] = save_global
- # Pickling helpers
- def _keep_alive(x, memo):
- """Keeps a reference to the object x in the memo.
- Because we remember objects by their id, we have
- to assure that possibly temporary objects are kept
- alive by referencing them.
- We store a reference at the id of the memo, which should
- normally not be used unless someone tries to deepcopy
- the memo itself...
- """
- try:
- memo[id(memo)].append(x)
- except KeyError:
- # aha, this is the first one :-)
- memo[id(memo)]=[x]
- # A cache for whichmodule(), mapping a function object to the name of
- # the module in which the function was found.
- classmap = {} # called classmap for backwards compatibility
- def whichmodule(func, funcname):
- """Figure out the module in which a function occurs.
- Search sys.modules for the module.
- Cache in classmap.
- Return a module name.
- If the function cannot be found, return "__main__".
- """
- # Python functions should always get an __module__ from their globals.
- mod = getattr(func, "__module__", None)
- if mod is not None:
- return mod
- if func in classmap:
- return classmap[func]
- for name, module in sys.modules.items():
- if module is None:
- continue # skip dummy package entries
- if name != '__main__' and getattr(module, funcname, None) is func:
- break
- else:
- name = '__main__'
- classmap[func] = name
- return name
- # Unpickling machinery
- class Unpickler:
- def __init__(self, file):
- """This takes a file-like object for reading a pickle data stream.
- The protocol version of the pickle is detected automatically, so no
- proto argument is needed.
- The file-like object must have two methods, a read() method that
- takes an integer argument, and a readline() method that requires no
- arguments. Both methods should return a string. Thus file-like
- object can be a file object opened for reading, a StringIO object,
- or any other custom object that meets this interface.
- """
- self.readline = file.readline
- self.read = file.read
- self.memo = {}
- def load(self):
- """Read a pickled object representation from the open file.
- Return the reconstituted object hierarchy specified in the file.
- """
- self.mark = object() # any new unique object
- self.stack = []
- self.append = self.stack.append
- read = self.read
- dispatch = self.dispatch
- try:
- while 1:
- key = read(1)
- dispatch[key](self)
- except _Stop, stopinst:
- return stopinst.value
- # Return largest index k such that self.stack[k] is self.mark.
- # If the stack doesn't contain a mark, eventually raises IndexError.
- # This could be sped by maintaining another stack, of indices at which
- # the mark appears. For that matter, the latter stack would suffice,
- # and we wouldn't need to push mark objects on self.stack at all.
- # Doing so is probably a good thing, though, since if the pickle is
- # corrupt (or hostile) we may get a clue from finding self.mark embedded
- # in unpickled objects.
- def marker(self):
- stack = self.stack
- mark = self.mark
- k = len(stack)-1
- while stack[k] is not mark: k = k-1
- return k
- dispatch = {}
- def load_eof(self):
- raise EOFError
- dispatch[''] = load_eof
- def load_proto(self):
- proto = ord(self.read(1))
- if not 0 <= proto <= 2:
- raise ValueError, "unsupported pickle protocol: %d" % proto
- dispatch[PROTO] = load_proto
- def load_persid(self):
- pid = self.readline()[:-1]
- self.append(self.persistent_load(pid))
- dispatch[PERSID] = load_persid
- def load_binpersid(self):
- pid = self.stack.pop()
- self.append(self.persistent_load(pid))
- dispatch[BINPERSID] = load_binpersid
- def load_none(self):
- self.append(None)
- dispatch[NONE] = load_none
- def load_false(self):
- self.append(False)
- dispatch[NEWFALSE] = load_false
- def load_true(self):
- self.append(True)
- dispatch[NEWTRUE] = load_true
- def load_int(self):
- data = self.readline()
- if data == FALSE[1:]:
- val = False
- elif data == TRUE[1:]:
- val = True
- else:
- try:
- val = int(data)
- except ValueError:
- val = long(data)
- self.append(val)
- dispatch[INT] = load_int
- def load_binint(self):
- self.append(mloads('i' + self.read(4)))
- dispatch[BININT] = load_binint
- def load_binint1(self):
- self.append(ord(self.read(1)))
- dispatch[BININT1] = load_binint1
- def load_binint2(self):
- self.append(mloads('i' + self.read(2) + '\000\000'))
- dispatch[BININT2] = load_binint2
- def load_long(self):
- self.append(long(self.readline()[:-1], 0))
- dispatch[LONG] = load_long
- def load_long1(self):
- n = ord(self.read(1))
- bytes = self.read(n)
- self.append(decode_long(bytes))
- dispatch[LONG1] = load_long1
- def load_long4(self):
- n = mloads('i' + self.read(4))
- bytes = self.read(n)
- self.append(decode_long(bytes))
- dispatch[LONG4] = load_long4
- def load_float(self):
- self.append(float(self.readline()[:-1]))
- dispatch[FLOAT] = load_float
- def load_binfloat(self, unpack=struct.unpack):
- self.append(unpack('>d', self.read(8))[0])
- dispatch[BINFLOAT] = load_binfloat
- def load_string(self):
- rep = self.readline()[:-1]
- for q in "\"'": # double or single quote
- if rep.startswith(q):
- if not rep.endswith(q):
- raise ValueError, "insecure string pickle"
- rep = rep[len(q):-len(q)]
- break
- else:
- raise ValueError, "insecure string pickle"
- self.append(rep.decode("string-escape"))
- dispatch[STRING] = load_string
- def load_binstring(self):
- len = mloads('i' + self.read(4))
- self.append(self.read(len))
- dispatch[BINSTRING] = load_binstring
- def load_unicode(self):
- self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
- dispatch[UNICODE] = load_unicode
- def load_binunicode(self):
- len = mloads('i' + self.read(4))
- self.append(unicode(self.read(len),'utf-8'))
- dispatch[BINUNICODE] = load_binunicode
- def load_short_binstring(self):
- len = ord(self.read(1))
- self.append(self.read(len))
- dispatch[SHORT_BINSTRING] = load_short_binstring
- def load_tuple(self):
- k = self.marker()
- self.stack[k:] = [tuple(self.stack[k+1:])]
- dispatch[TUPLE] = load_tuple
- def load_empty_tuple(self):
- self.stack.append(())
- dispatch[EMPTY_TUPLE] = load_empty_tuple
- def load_tuple1(self):
- self.stack[-1] = (self.stack[-1],)
- dispatch[TUPLE1] = load_tuple1
- def load_tuple2(self):
- self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
- dispatch[TUPLE2] = load_tuple2
- def load_tuple3(self):
- self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
- dispatch[TUPLE3] = load_tuple3
- def load_empty_list(self):
- self.stack.append([])
- dispatch[EMPTY_LIST] = load_empty_list
- def load_empty_dictionary(self):
- self.stack.append({})
- dispatch[EMPTY_DICT] = load_empty_dictionary
- def load_list(self):
- k = self.marker()
- self.stack[k:] = [self.stack[k+1:]]
- dispatch[LIST] = load_list
- def load_dict(self):
- k = self.marker()
- d = {}
- items = self.stack[k+1:]
- for i in range(0, len(items), 2):
- key = items[i]
- value = items[i+1]
- d[key] = value
- self.stack[k:] = [d]
- dispatch[DICT] = load_dict
- # INST and OBJ differ only in how they get a class object. It's not
- # only sensible to do the rest in a common routine, the two routines
- # previously diverged and grew different bugs.
- # klass is the class to instantiate, and k points to the topmost mark
- # object, following which are the arguments for klass.__init__.
- def _instantiate(self, klass, k):
- args = tuple(self.stack[k+1:])
- del self.stack[k:]
- instantiated = 0
- if (not args and
- type(klass) is ClassType and
- not hasattr(klass, "__getinitargs__")):
- try:
- value = _EmptyClass()
- value.__class__ = klass
- instantiated = 1
- except RuntimeError:
- # In restricted execution, assignment to inst.__class__ is
- # prohibited
- pass
- if not instantiated:
- try:
- value = klass(*args)
- except TypeError, err:
- raise TypeError, "in constructor for %s: %s" % (
- klass.__name__, str(err)), sys.exc_info()[2]
- self.append(value)
- def load_inst(self):
- module = self.readline()[:-1]
- name = self.readline()[:-1]
- klass = self.find_class(module, name)
- self._instantiate(klass, self.marker())
- dispatch[INST] = load_inst
- def load_obj(self):
- # Stack is ... markobject classobject arg1 arg2 ...
- k = self.marker()
- klass = self.stack.pop(k+1)
- self._instantiate(klass, k)
- dispatch[OBJ] = load_obj
- def load_newobj(self):
- args = self.stack.pop()
- cls = self.stack[-1]
- obj = cls.__new__(cls, *args)
- self.stack[-1] = obj
- dispatch[NEWOBJ] = load_newobj
- def load_global(self):
- module = self.readline()[:-1]
- name = self.readline()[:-1]
- klass = self.find_class(module, name)
- self.append(klass)
- dispatch[GLOBAL] = load_global
- def load_ext1(self):
- code = ord(self.read(1))
- self.get_extension(code)
- dispatch[EXT1] = load_ext1
- def load_ext2(self):
- code = mloads('i' + self.read(2) + '\000\000')
- self.get_extension(code)
- dispatch[EXT2] = load_ext2
- def load_ext4(self):
- code = mloads('i' + self.read(4))
- self.get_extension(code)
- dispatch[EXT4] = load_ext4
- def get_extension(self, code):
- nil = []
- obj = _extension_cache.get(code, nil)
- if obj is not nil:
- self.append(obj)
- return
- key = _inverted_registry.get(code)
- if not key:
- raise ValueError("unregistered extension code %d" % code)
- obj = self.find_class(*key)
- _extension_cache[code] = obj
- self.append(obj)
- def find_class(self, module, name):
- # Subclasses may override this
- __import__(module)
- mod = sys.modules[module]
- klass = getattr(mod, name)
- return klass
- def load_reduce(self):
- stack = self.stack
- args = stack.pop()
- func = stack[-1]
- if args is None:
- # A hack for Jim Fulton's ExtensionClass, now deprecated
- warnings.warn("__basicnew__ special case is deprecated",
- DeprecationWarning)
- value = func.__basicnew__()
- else:
- value = func(*args)
- stack[-1] = value
- dispatch[REDUCE] = load_reduce
- def load_pop(self):
- del self.stack[-1]
- dispatch[POP] = load_pop
- def load_pop_mark(self):
- k = self.marker()
- del self.stack[k:]
- dispatch[POP_MARK] = load_pop_mark
- def load_dup(self):
- self.append(self.stack[-1])
- dispatch[DUP] = load_dup
- def load_get(self):
- self.append(self.memo[self.readline()[:-1]])
- dispatch[GET] = load_get
- def load_binget(self):
- i = ord(self.read(1))
- self.append(self.memo[repr(i)])
- dispatch[BINGET] = load_binget
- def load_long_binget(self):
- i = mloads('i' + self.read(4))
- self.append(self.memo[repr(i)])
- dispatch[LONG_BINGET] = load_long_binget
- def load_put(self):
- self.memo[self.readline()[:-1]] = self.stack[-1]
- dispatch[PUT] = load_put
- def load_binput(self):
- i = ord(self.read(1))
- self.memo[repr(i)] = self.stack[-1]
- dispatch[BINPUT] = load_binput
- def load_long_binput(self):
- i = mloads('i' + self.read(4))
- self.memo[repr(i)] = self.stack[-1]
- dispatch[LONG_BINPUT] = load_long_binput
- def load_append(self):
- stack = self.stack
- value = stack.pop()
- list = stack[-1]
- list.append(value)
- dispatch[APPEND] = load_append
- def load_appends(self):
- stack = self.stack
- mark = self.marker()
- list = stack[mark - 1]
- list.extend(stack[mark + 1:])
- del stack[mark:]
- dispatch[APPENDS] = load_appends
- def load_setitem(self):
- stack = self.stack
- value = stack.pop()
- key = stack.pop()
- dict = stack[-1]
- dict[key] = value
- dispatch[SETITEM] = load_setitem
- def load_setitems(self):
- stack = self.stack
- mark = self.marker()
- dict = stack[mark - 1]
- for i in range(mark + 1, len(stack), 2):
- dict[stack[i]] = stack[i + 1]
- del stack[mark:]
- dispatch[SETITEMS] = load_setitems
- def load_build(self):
- stack = self.stack
- state = stack.pop()
- inst = stack[-1]
- setstate = getattr(inst, "__setstate__", None)
- if setstate:
- setstate(state)
- return
- slotstate = None
- if isinstance(state, tuple) and len(state) == 2:
- state, slotstate = state
- if state:
- try:
- inst.__dict__.update(state)
- except RuntimeError:
- # XXX In restricted execution, the instance's __dict__
- # is not accessible. Use the old way of unpickling
- # the instance variables. This is a semantic
- # difference when unpickling in restricted
- # vs. unrestricted modes.
- # Note, however, that cPickle has never tried to do the
- # .update() business, and always uses
- # PyObject_SetItem(inst.__dict__, key, value) in a
- # loop over state.items().
- for k, v in state.items():
- setattr(inst, k, v)
- if slotstate:
- for k, v in slotstate.items():
- setattr(inst, k, v)
- dispatch[BUILD] = load_build
- def load_mark(self):
- self.append(self.mark)
- dispatch[MARK] = load_mark
- def load_stop(self):
- value = self.stack.pop()
- raise _Stop(value)
- dispatch[STOP] = load_stop
- # Helper class for load_inst/load_obj
- class _EmptyClass:
- pass
- # Encode/decode longs in linear time.
- import binascii as _binascii
- def encode_long(x):
- r"""Encode a long to a two's complement little-endian binary string.
- Note that 0L is a special case, returning an empty string, to save a
- byte in the LONG1 pickling context.
- >>> encode_long(0L)
- ''
- >>> encode_long(255L)
- '\xff\x00'
- >>> encode_long(32767L)
- '\xff\x7f'
- >>> encode_long(-256L)
- '\x00\xff'
- >>> encode_long(-32768L)
- '\x00\x80'
- >>> encode_long(-128L)
- '\x80'
- >>> encode_long(127L)
- '\x7f'
- >>>
- """
- if x == 0:
- return ''
- if x > 0:
- ashex = hex(x)
- assert ashex.startswith("0x")
- njunkchars = 2 + ashex.endswith('L')
- nibbles = len(ashex) - njunkchars
- if nibbles & 1:
- # need an even # of nibbles for unhexlify
- ashex = "0x0" + ashex[2:]
- elif int(ashex[2], 16) >= 8:
- # "looks negative", so need a byte of sign bits
- ashex = "0x00" + ashex[2:]
- else:
- # Build the 256's-complement: (1L << nbytes) + x. The trick is
- # to find the number of bytes in linear time (although that should
- # really be a constant-time task).
- ashex = hex(-x)
- assert ashex.startswith("0x")
- njunkchars = 2 + ashex.endswith('L')
- nibbles = len(ashex) - njunkchars
- if nibbles & 1:
- # Extend to a full byte.
- nibbles += 1
- nbits = nibbles * 4
- x += 1L << nbits
- assert x > 0
- ashex = hex(x)
- njunkchars = 2 + ashex.endswith('L')
- newnibbles = len(ashex) - njunkchars
- if newnibbles < nibbles:
- ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
- if int(ashex[2], 16) < 8:
- # "looks positive", so need a byte of sign bits
- ashex = "0xff" + ashex[2:]
- if ashex.endswith('L'):
- ashex = ashex[2:-1]
- else:
- ashex = ashex[2:]
- assert len(ashex) & 1 == 0, (x, ashex)
- binary = _binascii.unhexlify(ashex)
- return binary[::-1]
- def decode_long(data):
- r"""Decode a long from a two's complement little-endian binary string.
- >>> decode_long('')
- 0L
- >>> decode_long("\xff\x00")
- 255L
- >>> decode_long("\xff\x7f")
- 32767L
- >>> decode_long("\x00\xff")
- -256L
- >>> decode_long("\x00\x80")
- -32768L
- >>> decode_long("\x80")
- -128L
- >>> decode_long("\x7f")
- 127L
- """
- nbytes = len(data)
- if nbytes == 0:
- return 0L
- ashex = _binascii.hexlify(data[::-1])
- n = long(ashex, 16) # quadratic time before Python 2.3; linear now
- if data[-1] >= '\x80':
- n -= 1L << (nbytes * 8)
- return n
- # Shorthands
- try:
- from cStringIO import StringIO
- except ImportError:
- from StringIO import StringIO
- def dump(obj, file, protocol=None, bin=None):
- Pickler(file, protocol, bin).dump(obj)
- def dumps(obj, protocol=None, bin=None):
- file = StringIO()
- Pickler(file, protocol, bin).dump(obj)
- return file.getvalue()
- def load(file):
- return Unpickler(file).load()
- def loads(str):
- file = StringIO(str)
- return Unpickler(file).load()
- # Doctest
- def _test():
- import doctest
- return doctest.testmod()
- if __name__ == "__main__":
- _test()
|