From: Karl Hasselström <kha@xxxxxxxxxxx> Having non-ascii characters in email headers is illegal, but StGIT currently does not care. I'm often bitten by this, since my name doesn't fit in ascii. This patch implements an encoding pass just before the email is sent over the wire -- in particular, it comes after any interactive editing and templates and such, so the user should never have to see the rfc2047 encoding. NOTE: The rfc2047 encoder needs to know the encoding of the input string. This patch hard-codes this to utf8, since that should be by far the most common non-ascii encoding, and since utf8 is already the hardcoded character set for the email body. In the long run, we probably want to get this from the locale, or from a command line switch, or both. Signed-off-by: Karl Hasselström <kha@xxxxxxxxxxx> --- stgit/commands/mail.py | 45 +++++++++++++++++++++++++++++++++++++++++---- 1 files changed, 41 insertions(+), 4 deletions(-) diff --git a/stgit/commands/mail.py b/stgit/commands/mail.py index 34504e6..b661308 100644 --- a/stgit/commands/mail.py +++ b/stgit/commands/mail.py @@ -15,7 +15,7 @@ along with this program; if not, write t Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ -import sys, os, re, time, datetime, smtplib, email.Utils +import sys, os, re, time, datetime, smtplib, email.Header, email.Utils from optparse import OptionParser, make_option from stgit.commands.common import * @@ -403,6 +403,42 @@ def __build_message(tmpl, patch, patch_n return msg.strip('\n') +def encode_header(s, enc): + """Take an entire e-mail header line, encoded in enc, and + rfc2047-encode it.""" + def trans(s): + return str(email.Header.Header(unicode(s, enc))) + words = s.split(' ') + first_encode = len(words) + last_encode = -1 + for i in xrange(len(words)): + ew = trans(words[i]) + if ew != words[i]: + first_encode = min(first_encode, i) + last_encode = max(last_encode, i) + if first_encode <= last_encode: + return ' '.join(filter( + None, + [' '.join(words[:first_encode]), + trans(' '.join(words[first_encode:last_encode+1])), + ' '.join(words[last_encode+1:])])) + else: + return s + +def encode_headers(msg, enc): + """rfc2047-encode the headers of msg, assuming it is encoded in + enc.""" + in_header = True + lines = [] + for line in msg.splitlines(True): + if in_header: + if line.strip(): + line = encode_header(line, enc) + else: + in_header = False + lines.append(line) + return ''.join(lines) + def func(parser, options, args): """Send the patches by e-mail using the patchmail.tmpl file as a template @@ -461,7 +497,8 @@ def func(parser, options, args): raise CmdException, 'No cover message template file found' msg_id = email.Utils.make_msgid('stgit') - msg = __build_cover(tmpl, total_nr, msg_id, options) + msg = encode_headers(__build_cover(tmpl, total_nr, msg_id, options), + 'UTF-8') from_addr, to_addr_list = __parse_addresses(msg) # subsequent e-mails are seen as replies to the first one @@ -487,8 +524,8 @@ def func(parser, options, args): for (p, patch_nr) in zip(patches, range(1, len(patches) + 1)): msg_id = email.Utils.make_msgid('stgit') - msg = __build_message(tmpl, p, patch_nr, total_nr, msg_id, ref_id, - options) + msg = encode_headers(__build_message(tmpl, p, patch_nr, total_nr, + msg_id, ref_id, options), 'UTF-8') from_addr, to_addr_list = __parse_addresses(msg) # subsequent e-mails are seen as replies to the first one - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html