Included charset detection

This patch includes charset detection to read the message.

This patch is related to issue #43

Signed-off-by: Rodolfo García Peñas (kix) <kix@kix.es>
This commit is contained in:
Rodolfo García Peñas (kix)
2021-02-19 16:39:17 +01:00
parent 76c7a723db
commit 62490ff183
2 changed files with 17 additions and 3 deletions

View File

@ -20,9 +20,11 @@ import socket
import time
import re
import os
from pathlib import Path
from sys import exc_info
from threading import Lock
from hashlib import md5
import chardet
from offlineimap import OfflineImapError, emailutil
from .Base import BaseFolder
@ -256,11 +258,18 @@ class MaildirFolder(BaseFolder):
def getmessage(self, uid):
"""Return the content of the message."""
# TODO: Perhaps, force the encoding using config file
filename = self.messagelist[uid]['filename']
filepath = os.path.join(self.getfullname(), filename)
file = open(filepath, 'rt')
retval = file.read()
file.close()
# Open the file as binary and read it
file = Path(filepath)
blob = file.read_bytes()
# Detect the encoding
detection = chardet.detect(blob)
encoding = detection["encoding"]
# Read the file as text
retval = blob.decode(encoding)
# TODO: WHY are we replacing \r\n with \n here? And why do we
# read it as text?
return retval.replace("\r\n", "\n")