Merge pull request #53 from thekix/master

Included charset detection
This commit is contained in:
Rodolfo García Peñas (kix) 2021-02-19 16:43:02 +01:00 committed by GitHub
commit 9e8e30794c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 3 deletions

View File

@ -20,9 +20,11 @@ import socket
import time import time
import re import re
import os import os
from pathlib import Path
from sys import exc_info from sys import exc_info
from threading import Lock from threading import Lock
from hashlib import md5 from hashlib import md5
import chardet
from offlineimap import OfflineImapError, emailutil from offlineimap import OfflineImapError, emailutil
from .Base import BaseFolder from .Base import BaseFolder
@ -256,11 +258,18 @@ class MaildirFolder(BaseFolder):
def getmessage(self, uid): def getmessage(self, uid):
"""Return the content of the message.""" """Return the content of the message."""
# TODO: Perhaps, force the encoding using config file
filename = self.messagelist[uid]['filename'] filename = self.messagelist[uid]['filename']
filepath = os.path.join(self.getfullname(), filename) filepath = os.path.join(self.getfullname(), filename)
file = open(filepath, 'rt') # Open the file as binary and read it
retval = file.read() file = Path(filepath)
file.close() blob = file.read_bytes()
# Detect the encoding
detection = chardet.detect(blob)
encoding = detection["encoding"]
# Read the file as text
retval = blob.decode(encoding)
# TODO: WHY are we replacing \r\n with \n here? And why do we # TODO: WHY are we replacing \r\n with \n here? And why do we
# read it as text? # read it as text?
return retval.replace("\r\n", "\n") return retval.replace("\r\n", "\n")

View File

@ -3,3 +3,8 @@ gssapi[kerberos]
portalocker[cygwin] portalocker[cygwin]
rfc6555 rfc6555
distro distro
imaplib2~=3.5
urllib3~=1.25.9
certifi~=2020.6.20
chardet~=3.0.4