From 032376efadad56a52f4cd6ee55c216d80b464cca Mon Sep 17 00:00:00 2001 From: Urs Liska Date: Mon, 2 Oct 2017 01:06:18 +0200 Subject: [PATCH] utf8: Add imap4_utf_7 codec Add code to reencode IMAP folder names to regular utf-8. This starts an implementation that will add a new config option `utf8foldernames` on account level which will fix #299 and on the long run replace the current `decodefoldernames` option. This commit introduces code to register an `imap4_utf_7` codec on which two-way conversion methods will later be built. Original code by (https://www.blogger.com/profile/16648963337079496096), taken from http://piao-tech.blogspot.no/2010/03/get-offlineimap-working-with-non-ascii.html In the comment http://piao-tech.blogspot.com/2010/03/get-offlineimap-working-with-non-ascii.html?showComment=1316041409339#c669880170006851138 indicates that this code is expected to be incorporated into offlineIMAP and therefore the author implicitly agrees to put it under this license. Signed-off-by: Urs Liska Signed-off-by: Nicolas Sebrecht --- offlineimap/imaputil.py | 72 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/offlineimap/imaputil.py b/offlineimap/imaputil.py index cc51531..1299cdd 100644 --- a/offlineimap/imaputil.py +++ b/offlineimap/imaputil.py @@ -17,6 +17,8 @@ import re import string +import binascii +import codecs from offlineimap.ui import getglobalui @@ -370,3 +372,73 @@ def decode_mailbox_name(name): return ret.decode('utf-7').encode('utf-8') except (UnicodeDecodeError, UnicodeEncodeError): return name + +# Functionality to convert folder names encoded in IMAP_utf_7 to utf_8. +# This is achieved by defining 'imap4_utf_7' as a proper encoding scheme. + +def modified_base64(s): + s = s.encode('utf-16be') + return binascii.b2a_base64(s).rstrip('\n=').replace('/', ',') + +def doB64(_in, r): + if _in: + r.append('&%s-' % modified_base64(''.join(_in))) + del _in[:] + +def encoder(s): + r = [] + _in = [] + for c in s: + ordC = ord(c) + if 0x20 <= ordC <= 0x25 or 0x27 <= ordC <= 0x7e: + doB64(_in, r) + r.append(c) + elif c == '&': + doB64(_in, r) + r.append('&-') + else: + _in.append(c) + doB64(_in, r) + return (str(''.join(r)), len(s)) + +# decoding +def modified_unbase64(s): + b = binascii.a2b_base64(s.replace(',', '/') + '===') + return unicode(b, 'utf-16be') + +def decoder(s): + r = [] + decode = [] + for c in s: + if c == '&' and not decode: + decode.append('&') + elif c == '-' and decode: + if len(decode) == 1: + r.append('&') + else: + r.append(modified_unbase64(''.join(decode[1:]))) + decode = [] + elif decode: + decode.append(c) + else: + r.append(c) + + if decode: + r.append(modified_unbase64(''.join(decode[1:]))) + bin_str = ''.join(r) + return (bin_str, len(s)) + +class StreamReader(codecs.StreamReader): + def decode(self, s, errors='strict'): + return decoder(s) + +class StreamWriter(codecs.StreamWriter): + def decode(self, s, errors='strict'): + return encoder(s) + +def imap4_utf_7(name): + if name == 'imap4-utf-7': + return (encoder, decoder, StreamReader, StreamWriter) + + +codecs.register(imap4_utf_7)