123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324 |
- r"""File-like objects that read from or write to a string buffer.
- This implements (nearly) all stdio methods.
- f = StringIO() # ready for writing
- f = StringIO(buf) # ready for reading
- f.close() # explicitly release resources held
- flag = f.isatty() # always false
- pos = f.tell() # get current position
- f.seek(pos) # set current position
- f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
- buf = f.read() # read until EOF
- buf = f.read(n) # read up to n bytes
- buf = f.readline() # read until end of line ('\n') or EOF
- list = f.readlines()# list of f.readline() results until EOF
- f.truncate([size]) # truncate file at to at most size (default: current pos)
- f.write(buf) # write at current position
- f.writelines(list) # for line in list: f.write(line)
- f.getvalue() # return whole file's contents as a string
- Notes:
- - Using a real file is often faster (but less convenient).
- - There's also a much faster implementation in C, called cStringIO, but
- it's not subclassable.
- - fileno() is left unimplemented so that code which uses it triggers
- an exception early.
- - Seeking far beyond EOF and then writing will insert real null
- bytes that occupy space in the buffer.
- - There's a simple test set (see end of this file).
- """
- try:
- from errno import EINVAL
- except ImportError:
- EINVAL = 22
- __all__ = ["StringIO"]
- def _complain_ifclosed(closed):
- if closed:
- raise ValueError, "I/O operation on closed file"
- class StringIO:
- """class StringIO([buffer])
- When a StringIO object is created, it can be initialized to an existing
- string by passing the string to the constructor. If no string is given,
- the StringIO will start empty.
- The StringIO object can accept either Unicode or 8-bit strings, but
- mixing the two may take some care. If both are used, 8-bit strings that
- cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
- a UnicodeError to be raised when getvalue() is called.
- """
- def __init__(self, buf = ''):
- # Force self.buf to be a string or unicode
- if not isinstance(buf, basestring):
- buf = str(buf)
- self.buf = buf
- self.len = len(buf)
- self.buflist = []
- self.pos = 0
- self.closed = False
- self.softspace = 0
- def __iter__(self):
- return self
- def next(self):
- """A file object is its own iterator, for example iter(f) returns f
- (unless f is closed). When a file is used as an iterator, typically
- in a for loop (for example, for line in f: print line), the next()
- method is called repeatedly. This method returns the next input line,
- or raises StopIteration when EOF is hit.
- """
- if self.closed:
- raise StopIteration
- r = self.readline()
- if not r:
- raise StopIteration
- return r
- def close(self):
- """Free the memory buffer.
- """
- if not self.closed:
- self.closed = True
- del self.buf, self.pos
- def isatty(self):
- """Returns False because StringIO objects are not connected to a
- tty-like device.
- """
- _complain_ifclosed(self.closed)
- return False
- def seek(self, pos, mode = 0):
- """Set the file's current position.
- The mode argument is optional and defaults to 0 (absolute file
- positioning); other values are 1 (seek relative to the current
- position) and 2 (seek relative to the file's end).
- There is no return value.
- """
- _complain_ifclosed(self.closed)
- if self.buflist:
- self.buf += ''.join(self.buflist)
- self.buflist = []
- if mode == 1:
- pos += self.pos
- elif mode == 2:
- pos += self.len
- self.pos = max(0, pos)
- def tell(self):
- """Return the file's current position."""
- _complain_ifclosed(self.closed)
- return self.pos
- def read(self, n = -1):
- """Read at most size bytes from the file
- (less if the read hits EOF before obtaining size bytes).
- If the size argument is negative or omitted, read all data until EOF
- is reached. The bytes are returned as a string object. An empty
- string is returned when EOF is encountered immediately.
- """
- _complain_ifclosed(self.closed)
- if self.buflist:
- self.buf += ''.join(self.buflist)
- self.buflist = []
- if n < 0:
- newpos = self.len
- else:
- newpos = min(self.pos+n, self.len)
- r = self.buf[self.pos:newpos]
- self.pos = newpos
- return r
- def readline(self, length=None):
- """Read one entire line from the file.
- A trailing newline character is kept in the string (but may be absent
- when a file ends with an incomplete line). If the size argument is
- present and non-negative, it is a maximum byte count (including the
- trailing newline) and an incomplete line may be returned.
- An empty string is returned only when EOF is encountered immediately.
- Note: Unlike stdio's fgets(), the returned string contains null
- characters ('\0') if they occurred in the input.
- """
- _complain_ifclosed(self.closed)
- if self.buflist:
- self.buf += ''.join(self.buflist)
- self.buflist = []
- i = self.buf.find('\n', self.pos)
- if i < 0:
- newpos = self.len
- else:
- newpos = i+1
- if length is not None:
- if self.pos + length < newpos:
- newpos = self.pos + length
- r = self.buf[self.pos:newpos]
- self.pos = newpos
- return r
- def readlines(self, sizehint = 0):
- """Read until EOF using readline() and return a list containing the
- lines thus read.
- If the optional sizehint argument is present, instead of reading up
- to EOF, whole lines totalling approximately sizehint bytes (or more
- to accommodate a final whole line).
- """
- total = 0
- lines = []
- line = self.readline()
- while line:
- lines.append(line)
- total += len(line)
- if 0 < sizehint <= total:
- break
- line = self.readline()
- return lines
- def truncate(self, size=None):
- """Truncate the file's size.
- If the optional size argument is present, the file is truncated to
- (at most) that size. The size defaults to the current position.
- The current file position is not changed unless the position
- is beyond the new file size.
- If the specified size exceeds the file's current size, the
- file remains unchanged.
- """
- _complain_ifclosed(self.closed)
- if size is None:
- size = self.pos
- elif size < 0:
- raise IOError(EINVAL, "Negative size not allowed")
- elif size < self.pos:
- self.pos = size
- self.buf = self.getvalue()[:size]
- self.len = size
- def write(self, s):
- """Write a string to the file.
- There is no return value.
- """
- _complain_ifclosed(self.closed)
- if not s: return
- # Force s to be a string or unicode
- if not isinstance(s, basestring):
- s = str(s)
- spos = self.pos
- slen = self.len
- if spos == slen:
- self.buflist.append(s)
- self.len = self.pos = spos + len(s)
- return
- if spos > slen:
- self.buflist.append('\0'*(spos - slen))
- slen = spos
- newpos = spos + len(s)
- if spos < slen:
- if self.buflist:
- self.buf += ''.join(self.buflist)
- self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
- self.buf = ''
- if newpos > slen:
- slen = newpos
- else:
- self.buflist.append(s)
- slen = newpos
- self.len = slen
- self.pos = newpos
- def writelines(self, iterable):
- """Write a sequence of strings to the file. The sequence can be any
- iterable object producing strings, typically a list of strings. There
- is no return value.
- (The name is intended to match readlines(); writelines() does not add
- line separators.)
- """
- write = self.write
- for line in iterable:
- write(line)
- def flush(self):
- """Flush the internal buffer
- """
- _complain_ifclosed(self.closed)
- def getvalue(self):
- """
- Retrieve the entire contents of the "file" at any time before
- the StringIO object's close() method is called.
- The StringIO object can accept either Unicode or 8-bit strings,
- but mixing the two may take some care. If both are used, 8-bit
- strings that cannot be interpreted as 7-bit ASCII (that use the
- 8th bit) will cause a UnicodeError to be raised when getvalue()
- is called.
- """
- if self.buflist:
- self.buf += ''.join(self.buflist)
- self.buflist = []
- return self.buf
- # A little test suite
- def test():
- import sys
- if sys.argv[1:]:
- file = sys.argv[1]
- else:
- file = '/etc/passwd'
- lines = open(file, 'r').readlines()
- text = open(file, 'r').read()
- f = StringIO()
- for line in lines[:-2]:
- f.write(line)
- f.writelines(lines[-2:])
- if f.getvalue() != text:
- raise RuntimeError, 'write failed'
- length = f.tell()
- print 'File length =', length
- f.seek(len(lines[0]))
- f.write(lines[1])
- f.seek(0)
- print 'First line =', repr(f.readline())
- print 'Position =', f.tell()
- line = f.readline()
- print 'Second line =', repr(line)
- f.seek(-len(line), 1)
- line2 = f.read(len(line))
- if line != line2:
- raise RuntimeError, 'bad result after seek back'
- f.seek(len(line2), 1)
- list = f.readlines()
- line = list[-1]
- f.seek(f.tell() - len(line))
- line2 = f.read()
- if line != line2:
- raise RuntimeError, 'bad result after seek back from EOF'
- print 'Read', len(list), 'more lines'
- print 'File length =', f.tell()
- if f.tell() != length:
- raise RuntimeError, 'bad length'
- f.truncate(length/2)
- f.seek(0, 2)
- print 'Truncated length =', f.tell()
- if f.tell() != length/2:
- raise RuntimeError, 'truncate did not adjust length'
- f.close()
- if __name__ == '__main__':
- test()
|