Included charset detection

This patch includes charset detection to read the message.

This patch is related to issue #43

Signed-off-by: Rodolfo García Peñas (kix) <kix@kix.es>
This commit is contained in:
Rodolfo García Peñas (kix) 2021-02-19 16:39:17 +01:00
parent 76c7a723db
commit 62490ff183
2 changed files with 17 additions and 3 deletions

View File

@ -20,9 +20,11 @@ import socket
import time
import re
import os
from pathlib import Path
from sys import exc_info
from threading import Lock
from hashlib import md5
import chardet
from offlineimap import OfflineImapError, emailutil
from .Base import BaseFolder
@ -256,11 +258,18 @@ class MaildirFolder(BaseFolder):
def getmessage(self, uid):
"""Return the content of the message."""
# TODO: Perhaps, force the encoding using config file
filename = self.messagelist[uid]['filename']
filepath = os.path.join(self.getfullname(), filename)
file = open(filepath, 'rt')
retval = file.read()
file.close()
# Open the file as binary and read it
file = Path(filepath)
blob = file.read_bytes()
# Detect the encoding
detection = chardet.detect(blob)
encoding = detection["encoding"]
# Read the file as text
retval = blob.decode(encoding)
# TODO: WHY are we replacing \r\n with \n here? And why do we
# read it as text?
return retval.replace("\r\n", "\n")

View File

@ -3,3 +3,8 @@ gssapi[kerberos]
portalocker[cygwin]
rfc6555
distro
imaplib2~=3.5
urllib3~=1.25.9
certifi~=2020.6.20
chardet~=3.0.4