123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464 |
- #! /usr/bin/env python
- """Mimification and unmimification of mail messages.
- Decode quoted-printable parts of a mail message or encode using
- quoted-printable.
- Usage:
- mimify(input, output)
- unmimify(input, output, decode_base64 = 0)
- to encode and decode respectively. Input and output may be the name
- of a file or an open file object. Only a readline() method is used
- on the input file, only a write() method is used on the output file.
- When using file names, the input and output file names may be the
- same.
- Interactive usage:
- mimify.py -e [infile [outfile]]
- mimify.py -d [infile [outfile]]
- to encode and decode respectively. Infile defaults to standard
- input and outfile to standard output.
- """
- # Configure
- MAXLEN = 200 # if lines longer than this, encode as quoted-printable
- CHARSET = 'ISO-8859-1' # default charset for non-US-ASCII mail
- QUOTE = '> ' # string replies are quoted with
- # End configure
- import re
- __all__ = ["mimify","unmimify","mime_encode_header","mime_decode_header"]
- qp = re.compile('^content-transfer-encoding:\\s*quoted-printable', re.I)
- base64_re = re.compile('^content-transfer-encoding:\\s*base64', re.I)
- mp = re.compile('^content-type:.*multipart/.*boundary="?([^;"\n]*)', re.I|re.S)
- chrset = re.compile('^(content-type:.*charset=")(us-ascii|iso-8859-[0-9]+)(".*)', re.I|re.S)
- he = re.compile('^-*\n')
- mime_code = re.compile('=([0-9a-f][0-9a-f])', re.I)
- mime_head = re.compile('=\\?iso-8859-1\\?q\\?([^? \t\n]+)\\?=', re.I)
- repl = re.compile('^subject:\\s+re: ', re.I)
- class File:
- """A simple fake file object that knows about limited read-ahead and
- boundaries. The only supported method is readline()."""
- def __init__(self, file, boundary):
- self.file = file
- self.boundary = boundary
- self.peek = None
- def readline(self):
- if self.peek is not None:
- return ''
- line = self.file.readline()
- if not line:
- return line
- if self.boundary:
- if line == self.boundary + '\n':
- self.peek = line
- return ''
- if line == self.boundary + '--\n':
- self.peek = line
- return ''
- return line
- class HeaderFile:
- def __init__(self, file):
- self.file = file
- self.peek = None
- def readline(self):
- if self.peek is not None:
- line = self.peek
- self.peek = None
- else:
- line = self.file.readline()
- if not line:
- return line
- if he.match(line):
- return line
- while 1:
- self.peek = self.file.readline()
- if len(self.peek) == 0 or \
- (self.peek[0] != ' ' and self.peek[0] != '\t'):
- return line
- line = line + self.peek
- self.peek = None
- def mime_decode(line):
- """Decode a single line of quoted-printable text to 8bit."""
- newline = ''
- pos = 0
- while 1:
- res = mime_code.search(line, pos)
- if res is None:
- break
- newline = newline + line[pos:res.start(0)] + \
- chr(int(res.group(1), 16))
- pos = res.end(0)
- return newline + line[pos:]
- def mime_decode_header(line):
- """Decode a header line to 8bit."""
- newline = ''
- pos = 0
- while 1:
- res = mime_head.search(line, pos)
- if res is None:
- break
- match = res.group(1)
- # convert underscores to spaces (before =XX conversion!)
- match = ' '.join(match.split('_'))
- newline = newline + line[pos:res.start(0)] + mime_decode(match)
- pos = res.end(0)
- return newline + line[pos:]
- def unmimify_part(ifile, ofile, decode_base64 = 0):
- """Convert a quoted-printable part of a MIME mail message to 8bit."""
- multipart = None
- quoted_printable = 0
- is_base64 = 0
- is_repl = 0
- if ifile.boundary and ifile.boundary[:2] == QUOTE:
- prefix = QUOTE
- else:
- prefix = ''
- # read header
- hfile = HeaderFile(ifile)
- while 1:
- line = hfile.readline()
- if not line:
- return
- if prefix and line[:len(prefix)] == prefix:
- line = line[len(prefix):]
- pref = prefix
- else:
- pref = ''
- line = mime_decode_header(line)
- if qp.match(line):
- quoted_printable = 1
- continue # skip this header
- if decode_base64 and base64_re.match(line):
- is_base64 = 1
- continue
- ofile.write(pref + line)
- if not prefix and repl.match(line):
- # we're dealing with a reply message
- is_repl = 1
- mp_res = mp.match(line)
- if mp_res:
- multipart = '--' + mp_res.group(1)
- if he.match(line):
- break
- if is_repl and (quoted_printable or multipart):
- is_repl = 0
- # read body
- while 1:
- line = ifile.readline()
- if not line:
- return
- line = re.sub(mime_head, '\\1', line)
- if prefix and line[:len(prefix)] == prefix:
- line = line[len(prefix):]
- pref = prefix
- else:
- pref = ''
- ## if is_repl and len(line) >= 4 and line[:4] == QUOTE+'--' and line[-3:] != '--\n':
- ## multipart = line[:-1]
- while multipart:
- if line == multipart + '--\n':
- ofile.write(pref + line)
- multipart = None
- line = None
- break
- if line == multipart + '\n':
- ofile.write(pref + line)
- nifile = File(ifile, multipart)
- unmimify_part(nifile, ofile, decode_base64)
- line = nifile.peek
- if not line:
- # premature end of file
- break
- continue
- # not a boundary between parts
- break
- if line and quoted_printable:
- while line[-2:] == '=\n':
- line = line[:-2]
- newline = ifile.readline()
- if newline[:len(QUOTE)] == QUOTE:
- newline = newline[len(QUOTE):]
- line = line + newline
- line = mime_decode(line)
- if line and is_base64 and not pref:
- import base64
- line = base64.decodestring(line)
- if line:
- ofile.write(pref + line)
- def unmimify(infile, outfile, decode_base64 = 0):
- """Convert quoted-printable parts of a MIME mail message to 8bit."""
- if type(infile) == type(''):
- ifile = open(infile)
- if type(outfile) == type('') and infile == outfile:
- import os
- d, f = os.path.split(infile)
- os.rename(infile, os.path.join(d, ',' + f))
- else:
- ifile = infile
- if type(outfile) == type(''):
- ofile = open(outfile, 'w')
- else:
- ofile = outfile
- nifile = File(ifile, None)
- unmimify_part(nifile, ofile, decode_base64)
- ofile.flush()
- mime_char = re.compile('[=\177-\377]') # quote these chars in body
- mime_header_char = re.compile('[=?\177-\377]') # quote these in header
- def mime_encode(line, header):
- """Code a single line as quoted-printable.
- If header is set, quote some extra characters."""
- if header:
- reg = mime_header_char
- else:
- reg = mime_char
- newline = ''
- pos = 0
- if len(line) >= 5 and line[:5] == 'From ':
- # quote 'From ' at the start of a line for stupid mailers
- newline = ('=%02x' % ord('F')).upper()
- pos = 1
- while 1:
- res = reg.search(line, pos)
- if res is None:
- break
- newline = newline + line[pos:res.start(0)] + \
- ('=%02x' % ord(res.group(0))).upper()
- pos = res.end(0)
- line = newline + line[pos:]
- newline = ''
- while len(line) >= 75:
- i = 73
- while line[i] == '=' or line[i-1] == '=':
- i = i - 1
- i = i + 1
- newline = newline + line[:i] + '=\n'
- line = line[i:]
- return newline + line
- mime_header = re.compile('([ \t(]|^)([-a-zA-Z0-9_+]*[\177-\377][-a-zA-Z0-9_+\177-\377]*)(?=[ \t)]|\n)')
- def mime_encode_header(line):
- """Code a single header line as quoted-printable."""
- newline = ''
- pos = 0
- while 1:
- res = mime_header.search(line, pos)
- if res is None:
- break
- newline = '%s%s%s=?%s?Q?%s?=' % \
- (newline, line[pos:res.start(0)], res.group(1),
- CHARSET, mime_encode(res.group(2), 1))
- pos = res.end(0)
- return newline + line[pos:]
- mv = re.compile('^mime-version:', re.I)
- cte = re.compile('^content-transfer-encoding:', re.I)
- iso_char = re.compile('[\177-\377]')
- def mimify_part(ifile, ofile, is_mime):
- """Convert an 8bit part of a MIME mail message to quoted-printable."""
- has_cte = is_qp = is_base64 = 0
- multipart = None
- must_quote_body = must_quote_header = has_iso_chars = 0
- header = []
- header_end = ''
- message = []
- message_end = ''
- # read header
- hfile = HeaderFile(ifile)
- while 1:
- line = hfile.readline()
- if not line:
- break
- if not must_quote_header and iso_char.search(line):
- must_quote_header = 1
- if mv.match(line):
- is_mime = 1
- if cte.match(line):
- has_cte = 1
- if qp.match(line):
- is_qp = 1
- elif base64_re.match(line):
- is_base64 = 1
- mp_res = mp.match(line)
- if mp_res:
- multipart = '--' + mp_res.group(1)
- if he.match(line):
- header_end = line
- break
- header.append(line)
- # read body
- while 1:
- line = ifile.readline()
- if not line:
- break
- if multipart:
- if line == multipart + '--\n':
- message_end = line
- break
- if line == multipart + '\n':
- message_end = line
- break
- if is_base64:
- message.append(line)
- continue
- if is_qp:
- while line[-2:] == '=\n':
- line = line[:-2]
- newline = ifile.readline()
- if newline[:len(QUOTE)] == QUOTE:
- newline = newline[len(QUOTE):]
- line = line + newline
- line = mime_decode(line)
- message.append(line)
- if not has_iso_chars:
- if iso_char.search(line):
- has_iso_chars = must_quote_body = 1
- if not must_quote_body:
- if len(line) > MAXLEN:
- must_quote_body = 1
- # convert and output header and body
- for line in header:
- if must_quote_header:
- line = mime_encode_header(line)
- chrset_res = chrset.match(line)
- if chrset_res:
- if has_iso_chars:
- # change us-ascii into iso-8859-1
- if chrset_res.group(2).lower() == 'us-ascii':
- line = '%s%s%s' % (chrset_res.group(1),
- CHARSET,
- chrset_res.group(3))
- else:
- # change iso-8859-* into us-ascii
- line = '%sus-ascii%s' % chrset_res.group(1, 3)
- if has_cte and cte.match(line):
- line = 'Content-Transfer-Encoding: '
- if is_base64:
- line = line + 'base64\n'
- elif must_quote_body:
- line = line + 'quoted-printable\n'
- else:
- line = line + '7bit\n'
- ofile.write(line)
- if (must_quote_header or must_quote_body) and not is_mime:
- ofile.write('Mime-Version: 1.0\n')
- ofile.write('Content-Type: text/plain; ')
- if has_iso_chars:
- ofile.write('charset="%s"\n' % CHARSET)
- else:
- ofile.write('charset="us-ascii"\n')
- if must_quote_body and not has_cte:
- ofile.write('Content-Transfer-Encoding: quoted-printable\n')
- ofile.write(header_end)
- for line in message:
- if must_quote_body:
- line = mime_encode(line, 0)
- ofile.write(line)
- ofile.write(message_end)
- line = message_end
- while multipart:
- if line == multipart + '--\n':
- # read bit after the end of the last part
- while 1:
- line = ifile.readline()
- if not line:
- return
- if must_quote_body:
- line = mime_encode(line, 0)
- ofile.write(line)
- if line == multipart + '\n':
- nifile = File(ifile, multipart)
- mimify_part(nifile, ofile, 1)
- line = nifile.peek
- if not line:
- # premature end of file
- break
- ofile.write(line)
- continue
- # unexpectedly no multipart separator--copy rest of file
- while 1:
- line = ifile.readline()
- if not line:
- return
- if must_quote_body:
- line = mime_encode(line, 0)
- ofile.write(line)
- def mimify(infile, outfile):
- """Convert 8bit parts of a MIME mail message to quoted-printable."""
- if type(infile) == type(''):
- ifile = open(infile)
- if type(outfile) == type('') and infile == outfile:
- import os
- d, f = os.path.split(infile)
- os.rename(infile, os.path.join(d, ',' + f))
- else:
- ifile = infile
- if type(outfile) == type(''):
- ofile = open(outfile, 'w')
- else:
- ofile = outfile
- nifile = File(ifile, None)
- mimify_part(nifile, ofile, 0)
- ofile.flush()
- import sys
- if __name__ == '__main__' or (len(sys.argv) > 0 and sys.argv[0] == 'mimify'):
- import getopt
- usage = 'Usage: mimify [-l len] -[ed] [infile [outfile]]'
- decode_base64 = 0
- opts, args = getopt.getopt(sys.argv[1:], 'l:edb')
- if len(args) not in (0, 1, 2):
- print usage
- sys.exit(1)
- if (('-e', '') in opts) == (('-d', '') in opts) or \
- ((('-b', '') in opts) and (('-d', '') not in opts)):
- print usage
- sys.exit(1)
- for o, a in opts:
- if o == '-e':
- encode = mimify
- elif o == '-d':
- encode = unmimify
- elif o == '-l':
- try:
- MAXLEN = int(a)
- except (ValueError, OverflowError):
- print usage
- sys.exit(1)
- elif o == '-b':
- decode_base64 = 1
- if len(args) == 0:
- encode_args = (sys.stdin, sys.stdout)
- elif len(args) == 1:
- encode_args = (args[0], sys.stdout)
- else:
- encode_args = (args[0], args[1])
- if decode_base64:
- encode_args = encode_args + (decode_base64,)
- encode(*encode_args)
|