mimify.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. #! /usr/bin/env python
  2. """Mimification and unmimification of mail messages.
  3. Decode quoted-printable parts of a mail message or encode using
  4. quoted-printable.
  5. Usage:
  6. mimify(input, output)
  7. unmimify(input, output, decode_base64 = 0)
  8. to encode and decode respectively. Input and output may be the name
  9. of a file or an open file object. Only a readline() method is used
  10. on the input file, only a write() method is used on the output file.
  11. When using file names, the input and output file names may be the
  12. same.
  13. Interactive usage:
  14. mimify.py -e [infile [outfile]]
  15. mimify.py -d [infile [outfile]]
  16. to encode and decode respectively. Infile defaults to standard
  17. input and outfile to standard output.
  18. """
  19. # Configure
  20. MAXLEN = 200 # if lines longer than this, encode as quoted-printable
  21. CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
  22. QUOTE = '> ' # string replies are quoted with
  23. # End configure
  24. import re
  25. __all__ = ["mimify","unmimify","mime_encode_header","mime_decode_header"]
  26. qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
  27. base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
  28. mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
  29. chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
  30. he = re.compile('^-*\n')
  31. mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
  32. mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
  33. repl = re.compile('^subject:\\s+re: ', re.I)
  34. class File:
  35. """A simple fake file object that knows about limited read-ahead and
  36. boundaries. The only supported method is readline()."""
  37. def __init__(self, file, boundary):
  38. self.file = file
  39. self.boundary = boundary
  40. self.peek = None
  41. def readline(self):
  42. if self.peek is not None:
  43. return ''
  44. line = self.file.readline()
  45. if not line:
  46. return line
  47. if self.boundary:
  48. if line == self.boundary + '\n':
  49. self.peek = line
  50. return ''
  51. if line == self.boundary + '--\n':
  52. self.peek = line
  53. return ''
  54. return line
  55. class HeaderFile:
  56. def __init__(self, file):
  57. self.file = file
  58. self.peek = None
  59. def readline(self):
  60. if self.peek is not None:
  61. line = self.peek
  62. self.peek = None
  63. else:
  64. line = self.file.readline()
  65. if not line:
  66. return line
  67. if he.match(line):
  68. return line
  69. while 1:
  70. self.peek = self.file.readline()
  71. if len(self.peek) == 0 or \
  72. (self.peek[0] != ' ' and self.peek[0] != '\t'):
  73. return line
  74. line = line + self.peek
  75. self.peek = None
  76. def mime_decode(line):
  77. """Decode a single line of quoted-printable text to 8bit."""
  78. newline = ''
  79. pos = 0
  80. while 1:
  81. res = mime_code.search(line, pos)
  82. if res is None:
  83. break
  84. newline = newline + line[pos:res.start(0)] + \
  85. chr(int(res.group(1), 16))
  86. pos = res.end(0)
  87. return newline + line[pos:]
  88. def mime_decode_header(line):
  89. """Decode a header line to 8bit."""
  90. newline = ''
  91. pos = 0
  92. while 1:
  93. res = mime_head.search(line, pos)
  94. if res is None:
  95. break
  96. match = res.group(1)
  97. # convert underscores to spaces (before =XX conversion!)
  98. match = ' '.join(match.split('_'))
  99. newline = newline + line[pos:res.start(0)] + mime_decode(match)
  100. pos = res.end(0)
  101. return newline + line[pos:]
  102. def unmimify_part(ifile, ofile, decode_base64 = 0):
  103. """Convert a quoted-printable part of a MIME mail message to 8bit."""
  104. multipart = None
  105. quoted_printable = 0
  106. is_base64 = 0
  107. is_repl = 0
  108. if ifile.boundary and ifile.boundary[:2] == QUOTE:
  109. prefix = QUOTE
  110. else:
  111. prefix = ''
  112. # read header
  113. hfile = HeaderFile(ifile)
  114. while 1:
  115. line = hfile.readline()
  116. if not line:
  117. return
  118. if prefix and line[:len(prefix)] == prefix:
  119. line = line[len(prefix):]
  120. pref = prefix
  121. else:
  122. pref = ''
  123. line = mime_decode_header(line)
  124. if qp.match(line):
  125. quoted_printable = 1
  126. continue # skip this header
  127. if decode_base64 and base64_re.match(line):
  128. is_base64 = 1
  129. continue
  130. ofile.write(pref + line)
  131. if not prefix and repl.match(line):
  132. # we're dealing with a reply message
  133. is_repl = 1
  134. mp_res = mp.match(line)
  135. if mp_res:
  136. multipart = '--' + mp_res.group(1)
  137. if he.match(line):
  138. break
  139. if is_repl and (quoted_printable or multipart):
  140. is_repl = 0
  141. # read body
  142. while 1:
  143. line = ifile.readline()
  144. if not line:
  145. return
  146. line = re.sub(mime_head, '\\1', line)
  147. if prefix and line[:len(prefix)] == prefix:
  148. line = line[len(prefix):]
  149. pref = prefix
  150. else:
  151. pref = ''
  152. ## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
  153. ## multipart = line[:-1]
  154. while multipart:
  155. if line == multipart + '--\n':
  156. ofile.write(pref + line)
  157. multipart = None
  158. line = None
  159. break
  160. if line == multipart + '\n':
  161. ofile.write(pref + line)
  162. nifile = File(ifile, multipart)
  163. unmimify_part(nifile, ofile, decode_base64)
  164. line = nifile.peek
  165. if not line:
  166. # premature end of file
  167. break
  168. continue
  169. # not a boundary between parts
  170. break
  171. if line and quoted_printable:
  172. while line[-2:] == '=\n':
  173. line = line[:-2]
  174. newline = ifile.readline()
  175. if newline[:len(QUOTE)] == QUOTE:
  176. newline = newline[len(QUOTE):]
  177. line = line + newline
  178. line = mime_decode(line)
  179. if line and is_base64 and not pref:
  180. import base64
  181. line = base64.decodestring(line)
  182. if line:
  183. ofile.write(pref + line)
  184. def unmimify(infile, outfile, decode_base64 = 0):
  185. """Convert quoted-printable parts of a MIME mail message to 8bit."""
  186. if type(infile) == type(''):
  187. ifile = open(infile)
  188. if type(outfile) == type('') and infile == outfile:
  189. import os
  190. d, f = os.path.split(infile)
  191. os.rename(infile, os.path.join(d, ',' + f))
  192. else:
  193. ifile = infile
  194. if type(outfile) == type(''):
  195. ofile = open(outfile, 'w')
  196. else:
  197. ofile = outfile
  198. nifile = File(ifile, None)
  199. unmimify_part(nifile, ofile, decode_base64)
  200. ofile.flush()
  201. mime_char = re.compile('[=\177-\377]') # quote these chars in body
  202. mime_header_char = re.compile('[=?\177-\377]') # quote these in header
  203. def mime_encode(line, header):
  204. """Code a single line as quoted-printable.
  205. If header is set, quote some extra characters."""
  206. if header:
  207. reg = mime_header_char
  208. else:
  209. reg = mime_char
  210. newline = ''
  211. pos = 0
  212. if len(line) >= 5 and line[:5] == 'From ':
  213. # quote 'From ' at the start of a line for stupid mailers
  214. newline = ('=%02x' % ord('F')).upper()
  215. pos = 1
  216. while 1:
  217. res = reg.search(line, pos)
  218. if res is None:
  219. break
  220. newline = newline + line[pos:res.start(0)] + \
  221. ('=%02x' % ord(res.group(0))).upper()
  222. pos = res.end(0)
  223. line = newline + line[pos:]
  224. newline = ''
  225. while len(line) >= 75:
  226. i = 73
  227. while line[i] == '=' or line[i-1] == '=':
  228. i = i - 1
  229. i = i + 1
  230. newline = newline + line[:i] + '=\n'
  231. line = line[i:]
  232. return newline + line
  233. mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)(?=[ \t)]|\n)')
  234. def mime_encode_header(line):
  235. """Code a single header line as quoted-printable."""
  236. newline = ''
  237. pos = 0
  238. while 1:
  239. res = mime_header.search(line, pos)
  240. if res is None:
  241. break
  242. newline = '%s%s%s=?%s?Q?%s?=' % \
  243. (newline, line[pos:res.start(0)], res.group(1),
  244. CHARSET, mime_encode(res.group(2), 1))
  245. pos = res.end(0)
  246. return newline + line[pos:]
  247. mv = re.compile('^mime-version:', re.I)
  248. cte = re.compile('^content-transfer-encoding:', re.I)
  249. iso_char = re.compile('[\177-\377]')
  250. def mimify_part(ifile, ofile, is_mime):
  251. """Convert an 8bit part of a MIME mail message to quoted-printable."""
  252. has_cte = is_qp = is_base64 = 0
  253. multipart = None
  254. must_quote_body = must_quote_header = has_iso_chars = 0
  255. header = []
  256. header_end = ''
  257. message = []
  258. message_end = ''
  259. # read header
  260. hfile = HeaderFile(ifile)
  261. while 1:
  262. line = hfile.readline()
  263. if not line:
  264. break
  265. if not must_quote_header and iso_char.search(line):
  266. must_quote_header = 1
  267. if mv.match(line):
  268. is_mime = 1
  269. if cte.match(line):
  270. has_cte = 1
  271. if qp.match(line):
  272. is_qp = 1
  273. elif base64_re.match(line):
  274. is_base64 = 1
  275. mp_res = mp.match(line)
  276. if mp_res:
  277. multipart = '--' + mp_res.group(1)
  278. if he.match(line):
  279. header_end = line
  280. break
  281. header.append(line)
  282. # read body
  283. while 1:
  284. line = ifile.readline()
  285. if not line:
  286. break
  287. if multipart:
  288. if line == multipart + '--\n':
  289. message_end = line
  290. break
  291. if line == multipart + '\n':
  292. message_end = line
  293. break
  294. if is_base64:
  295. message.append(line)
  296. continue
  297. if is_qp:
  298. while line[-2:] == '=\n':
  299. line = line[:-2]
  300. newline = ifile.readline()
  301. if newline[:len(QUOTE)] == QUOTE:
  302. newline = newline[len(QUOTE):]
  303. line = line + newline
  304. line = mime_decode(line)
  305. message.append(line)
  306. if not has_iso_chars:
  307. if iso_char.search(line):
  308. has_iso_chars = must_quote_body = 1
  309. if not must_quote_body:
  310. if len(line) > MAXLEN:
  311. must_quote_body = 1
  312. # convert and output header and body
  313. for line in header:
  314. if must_quote_header:
  315. line = mime_encode_header(line)
  316. chrset_res = chrset.match(line)
  317. if chrset_res:
  318. if has_iso_chars:
  319. # change us-ascii into iso-8859-1
  320. if chrset_res.group(2).lower() == 'us-ascii':
  321. line = '%s%s%s' % (chrset_res.group(1),
  322. CHARSET,
  323. chrset_res.group(3))
  324. else:
  325. # change iso-8859-* into us-ascii
  326. line = '%sus-ascii%s' % chrset_res.group(1, 3)
  327. if has_cte and cte.match(line):
  328. line = 'Content-Transfer-Encoding: '
  329. if is_base64:
  330. line = line + 'base64\n'
  331. elif must_quote_body:
  332. line = line + 'quoted-printable\n'
  333. else:
  334. line = line + '7bit\n'
  335. ofile.write(line)
  336. if (must_quote_header or must_quote_body) and not is_mime:
  337. ofile.write('Mime-Version: 1.0\n')
  338. ofile.write('Content-Type: text/plain; ')
  339. if has_iso_chars:
  340. ofile.write('charset="%s"\n' % CHARSET)
  341. else:
  342. ofile.write('charset="us-ascii"\n')
  343. if must_quote_body and not has_cte:
  344. ofile.write('Content-Transfer-Encoding: quoted-printable\n')
  345. ofile.write(header_end)
  346. for line in message:
  347. if must_quote_body:
  348. line = mime_encode(line, 0)
  349. ofile.write(line)
  350. ofile.write(message_end)
  351. line = message_end
  352. while multipart:
  353. if line == multipart + '--\n':
  354. # read bit after the end of the last part
  355. while 1:
  356. line = ifile.readline()
  357. if not line:
  358. return
  359. if must_quote_body:
  360. line = mime_encode(line, 0)
  361. ofile.write(line)
  362. if line == multipart + '\n':
  363. nifile = File(ifile, multipart)
  364. mimify_part(nifile, ofile, 1)
  365. line = nifile.peek
  366. if not line:
  367. # premature end of file
  368. break
  369. ofile.write(line)
  370. continue
  371. # unexpectedly no multipart separator--copy rest of file
  372. while 1:
  373. line = ifile.readline()
  374. if not line:
  375. return
  376. if must_quote_body:
  377. line = mime_encode(line, 0)
  378. ofile.write(line)
  379. def mimify(infile, outfile):
  380. """Convert 8bit parts of a MIME mail message to quoted-printable."""
  381. if type(infile) == type(''):
  382. ifile = open(infile)
  383. if type(outfile) == type('') and infile == outfile:
  384. import os
  385. d, f = os.path.split(infile)
  386. os.rename(infile, os.path.join(d, ',' + f))
  387. else:
  388. ifile = infile
  389. if type(outfile) == type(''):
  390. ofile = open(outfile, 'w')
  391. else:
  392. ofile = outfile
  393. nifile = File(ifile, None)
  394. mimify_part(nifile, ofile, 0)
  395. ofile.flush()
  396. import sys
  397. if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
  398. import getopt
  399. usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
  400. decode_base64 = 0
  401. opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
  402. if len(args) not in (0, 1, 2):
  403. print usage
  404. sys.exit(1)
  405. if (('-e', '') in opts) == (('-d', '') in opts) or \
  406. ((('-b', '') in opts) and (('-d', '') not in opts)):
  407. print usage
  408. sys.exit(1)
  409. for o, a in opts:
  410. if o == '-e':
  411. encode = mimify
  412. elif o == '-d':
  413. encode = unmimify
  414. elif o == '-l':
  415. try:
  416. MAXLEN = int(a)
  417. except (ValueError, OverflowError):
  418. print usage
  419. sys.exit(1)
  420. elif o == '-b':
  421. decode_base64 = 1
  422. if len(args) == 0:
  423. encode_args = (sys.stdin, sys.stdout)
  424. elif len(args) == 1:
  425. encode_args = (args[0], sys.stdout)
  426. else:
  427. encode_args = (args[0], args[1])
  428. if decode_base64:
  429. encode_args = encode_args + (decode_base64,)
  430. encode(*encode_args)