From 62490ff1835b22941d1775d1d4bb98d042df026b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rodolfo=20Garc=C3=ADa=20Pe=C3=B1as=20=28kix=29?= Date: Fri, 19 Feb 2021 16:39:17 +0100 Subject: [PATCH] Included charset detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch includes charset detection to read the message. This patch is related to issue #43 Signed-off-by: Rodolfo García Peñas (kix) --- offlineimap/folder/Maildir.py | 15 ++++++++++++--- requirements.txt | 5 +++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/offlineimap/folder/Maildir.py b/offlineimap/folder/Maildir.py index 205feea..0c44b60 100644 --- a/offlineimap/folder/Maildir.py +++ b/offlineimap/folder/Maildir.py @@ -20,9 +20,11 @@ import socket import time import re import os +from pathlib import Path from sys import exc_info from threading import Lock from hashlib import md5 +import chardet from offlineimap import OfflineImapError, emailutil from .Base import BaseFolder @@ -256,11 +258,18 @@ class MaildirFolder(BaseFolder): def getmessage(self, uid): """Return the content of the message.""" + # TODO: Perhaps, force the encoding using config file filename = self.messagelist[uid]['filename'] filepath = os.path.join(self.getfullname(), filename) - file = open(filepath, 'rt') - retval = file.read() - file.close() + # Open the file as binary and read it + file = Path(filepath) + blob = file.read_bytes() + # Detect the encoding + detection = chardet.detect(blob) + encoding = detection["encoding"] + # Read the file as text + retval = blob.decode(encoding) + # TODO: WHY are we replacing \r\n with \n here? And why do we # read it as text? return retval.replace("\r\n", "\n") diff --git a/requirements.txt b/requirements.txt index f543c51..19d3fff 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,8 @@ gssapi[kerberos] portalocker[cygwin] rfc6555 distro + +imaplib2~=3.5 +urllib3~=1.25.9 +certifi~=2020.6.20 +chardet~=3.0.4 \ No newline at end of file