mailbox.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. #! /usr/bin/env python
  2. """Classes to handle Unix style, MMDF style, and MH style mailboxes."""
  3. import rfc822
  4. import os
  5. __all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox",
  6. "PortableUnixMailbox"]
  7. class _Mailbox:
  8. def __init__(self, fp, factory=rfc822.Message):
  9. self.fp = fp
  10. self.seekp = 0
  11. self.factory = factory
  12. def __iter__(self):
  13. return iter(self.next, None)
  14. def next(self):
  15. while 1:
  16. self.fp.seek(self.seekp)
  17. try:
  18. self._search_start()
  19. except EOFError:
  20. self.seekp = self.fp.tell()
  21. return None
  22. start = self.fp.tell()
  23. self._search_end()
  24. self.seekp = stop = self.fp.tell()
  25. if start != stop:
  26. break
  27. return self.factory(_Subfile(self.fp, start, stop))
  28. class _Subfile:
  29. def __init__(self, fp, start, stop):
  30. self.fp = fp
  31. self.start = start
  32. self.stop = stop
  33. self.pos = self.start
  34. def _read(self, length, read_function):
  35. if self.pos >= self.stop:
  36. return ''
  37. remaining = self.stop - self.pos
  38. if length is None or length < 0 or length > remaining:
  39. length = remaining
  40. self.fp.seek(self.pos)
  41. data = read_function(length)
  42. self.pos = self.fp.tell()
  43. return data
  44. def read(self, length = None):
  45. return self._read(length, self.fp.read)
  46. def readline(self, length = None):
  47. return self._read(length, self.fp.readline)
  48. def readlines(self, sizehint = -1):
  49. lines = []
  50. while 1:
  51. line = self.readline()
  52. if not line:
  53. break
  54. lines.append(line)
  55. if sizehint >= 0:
  56. sizehint = sizehint - len(line)
  57. if sizehint <= 0:
  58. break
  59. return lines
  60. def tell(self):
  61. return self.pos - self.start
  62. def seek(self, pos, whence=0):
  63. if whence == 0:
  64. self.pos = self.start + pos
  65. elif whence == 1:
  66. self.pos = self.pos + pos
  67. elif whence == 2:
  68. self.pos = self.stop + pos
  69. def close(self):
  70. del self.fp
  71. # Recommended to use PortableUnixMailbox instead!
  72. class UnixMailbox(_Mailbox):
  73. def _search_start(self):
  74. while 1:
  75. pos = self.fp.tell()
  76. line = self.fp.readline()
  77. if not line:
  78. raise EOFError
  79. if line[:5] == 'From ' and self._isrealfromline(line):
  80. self.fp.seek(pos)
  81. return
  82. def _search_end(self):
  83. self.fp.readline() # Throw away header line
  84. while 1:
  85. pos = self.fp.tell()
  86. line = self.fp.readline()
  87. if not line:
  88. return
  89. if line[:5] == 'From ' and self._isrealfromline(line):
  90. self.fp.seek(pos)
  91. return
  92. # An overridable mechanism to test for From-line-ness. You can either
  93. # specify a different regular expression or define a whole new
  94. # _isrealfromline() method. Note that this only gets called for lines
  95. # starting with the 5 characters "From ".
  96. #
  97. # BAW: According to
  98. #http://home.netscape.com/eng/mozilla/2.0/relnotes/demo/content-length.html
  99. # the only portable, reliable way to find message delimiters in a BSD (i.e
  100. # Unix mailbox) style folder is to search for "\n\nFrom .*\n", or at the
  101. # beginning of the file, "^From .*\n". While _fromlinepattern below seems
  102. # like a good idea, in practice, there are too many variations for more
  103. # strict parsing of the line to be completely accurate.
  104. #
  105. # _strict_isrealfromline() is the old version which tries to do stricter
  106. # parsing of the From_ line. _portable_isrealfromline() simply returns
  107. # true, since it's never called if the line doesn't already start with
  108. # "From ".
  109. #
  110. # This algorithm, and the way it interacts with _search_start() and
  111. # _search_end() may not be completely correct, because it doesn't check
  112. # that the two characters preceding "From " are \n\n or the beginning of
  113. # the file. Fixing this would require a more extensive rewrite than is
  114. # necessary. For convenience, we've added a PortableUnixMailbox class
  115. # which uses the more lenient _fromlinepattern regular expression.
  116. _fromlinepattern = r"From \s*[^\s]+\s+\w\w\w\s+\w\w\w\s+\d?\d\s+" \
  117. r"\d?\d:\d\d(:\d\d)?(\s+[^\s]+)?\s+\d\d\d\d\s*$"
  118. _regexp = None
  119. def _strict_isrealfromline(self, line):
  120. if not self._regexp:
  121. import re
  122. self._regexp = re.compile(self._fromlinepattern)
  123. return self._regexp.match(line)
  124. def _portable_isrealfromline(self, line):
  125. return True
  126. _isrealfromline = _strict_isrealfromline
  127. class PortableUnixMailbox(UnixMailbox):
  128. _isrealfromline = UnixMailbox._portable_isrealfromline
  129. class MmdfMailbox(_Mailbox):
  130. def _search_start(self):
  131. while 1:
  132. line = self.fp.readline()
  133. if not line:
  134. raise EOFError
  135. if line[:5] == '\001\001\001\001\n':
  136. return
  137. def _search_end(self):
  138. while 1:
  139. pos = self.fp.tell()
  140. line = self.fp.readline()
  141. if not line:
  142. return
  143. if line == '\001\001\001\001\n':
  144. self.fp.seek(pos)
  145. return
  146. class MHMailbox:
  147. def __init__(self, dirname, factory=rfc822.Message):
  148. import re
  149. pat = re.compile('^[1-9][0-9]*$')
  150. self.dirname = dirname
  151. # the three following lines could be combined into:
  152. # list = map(long, filter(pat.match, os.listdir(self.dirname)))
  153. list = os.listdir(self.dirname)
  154. list = filter(pat.match, list)
  155. list = map(long, list)
  156. list.sort()
  157. # This only works in Python 1.6 or later;
  158. # before that str() added 'L':
  159. self.boxes = map(str, list)
  160. self.boxes.reverse()
  161. self.factory = factory
  162. def __iter__(self):
  163. return iter(self.next, None)
  164. def next(self):
  165. if not self.boxes:
  166. return None
  167. fn = self.boxes.pop()
  168. fp = open(os.path.join(self.dirname, fn))
  169. msg = self.factory(fp)
  170. try:
  171. msg._mh_msgno = fn
  172. except (AttributeError, TypeError):
  173. pass
  174. return msg
  175. class Maildir:
  176. # Qmail directory mailbox
  177. def __init__(self, dirname, factory=rfc822.Message):
  178. self.dirname = dirname
  179. self.factory = factory
  180. # check for new mail
  181. newdir = os.path.join(self.dirname, 'new')
  182. boxes = [os.path.join(newdir, f)
  183. for f in os.listdir(newdir) if f[0] != '.']
  184. # Now check for current mail in this maildir
  185. curdir = os.path.join(self.dirname, 'cur')
  186. boxes += [os.path.join(curdir, f)
  187. for f in os.listdir(curdir) if f[0] != '.']
  188. boxes.reverse()
  189. self.boxes = boxes
  190. def __iter__(self):
  191. return iter(self.next, None)
  192. def next(self):
  193. if not self.boxes:
  194. return None
  195. fn = self.boxes.pop()
  196. fp = open(fn)
  197. return self.factory(fp)
  198. class BabylMailbox(_Mailbox):
  199. def _search_start(self):
  200. while 1:
  201. line = self.fp.readline()
  202. if not line:
  203. raise EOFError
  204. if line == '*** EOOH ***\n':
  205. return
  206. def _search_end(self):
  207. while 1:
  208. pos = self.fp.tell()
  209. line = self.fp.readline()
  210. if not line:
  211. return
  212. if line == '\037\014\n' or line == '\037':
  213. self.fp.seek(pos)
  214. return
  215. def _test():
  216. import sys
  217. args = sys.argv[1:]
  218. if not args:
  219. for key in 'MAILDIR', 'MAIL', 'LOGNAME', 'USER':
  220. if key in os.environ:
  221. mbox = os.environ[key]
  222. break
  223. else:
  224. print "$MAIL, $LOGNAME nor $USER set -- who are you?"
  225. return
  226. else:
  227. mbox = args[0]
  228. if mbox[:1] == '+':
  229. mbox = os.environ['HOME'] + '/Mail/' + mbox[1:]
  230. elif not '/' in mbox:
  231. if os.path.isfile('/var/mail/' + mbox):
  232. mbox = '/var/mail/' + mbox
  233. else:
  234. mbox = '/usr/mail/' + mbox
  235. if os.path.isdir(mbox):
  236. if os.path.isdir(os.path.join(mbox, 'cur')):
  237. mb = Maildir(mbox)
  238. else:
  239. mb = MHMailbox(mbox)
  240. else:
  241. fp = open(mbox, 'r')
  242. mb = PortableUnixMailbox(fp)
  243. msgs = []
  244. while 1:
  245. msg = mb.next()
  246. if msg is None:
  247. break
  248. msgs.append(msg)
  249. if len(args) <= 1:
  250. msg.fp = None
  251. if len(args) > 1:
  252. num = int(args[1])
  253. print 'Message %d body:'%num
  254. msg = msgs[num-1]
  255. msg.rewindbody()
  256. sys.stdout.write(msg.fp.read())
  257. else:
  258. print 'Mailbox',mbox,'has',len(msgs),'messages:'
  259. for msg in msgs:
  260. f = msg.getheader('from') or ""
  261. s = msg.getheader('subject') or ""
  262. d = msg.getheader('date') or ""
  263. print '-%20.20s %20.20s %-30.30s'%(f, d[5:], s)
  264. if __name__ == '__main__':
  265. _test()