Fix parsing of quoted strings

When imaputil was parsing quoted strings, it treated "abcd\\"
as incomplete quoted string having escaped quote, rather than
properly-quoted string having escaped backslash.

GitHub issue: https://github.com/OfflineIMAP/offlineimap/issues/53
Signed-off-by: Eygene Ryabinkin <rea@codelabs.ru>
This commit is contained in:
Eygene Ryabinkin 2013-09-19 22:56:55 +04:00
parent be1c72ea5f
commit 57adfc23a5
2 changed files with 45 additions and 14 deletions

View File

@ -36,6 +36,9 @@ WIP (add new stuff for the next release)
* Updated bundled imaplib2 to 2.36: it includes support for SSL * Updated bundled imaplib2 to 2.36: it includes support for SSL
version override that was integrated into our code before, version override that was integrated into our code before,
no other changes. no other changes.
* Fixed parsing of quoted strings in IMAP responses: strings like "\\"
were treated as having \" as the escaped quote, rather than treating
it as the quoted escaped backslash (GitHub#53).
OfflineIMAP v6.5.5-rc1 (2012-09-05) OfflineIMAP v6.5.5-rc1 (2012-09-05)
=================================== ===================================

View File

@ -21,13 +21,6 @@ import string
from offlineimap.ui import getglobalui from offlineimap.ui import getglobalui
# find the first quote in a string
quotere = re.compile(
r"""(?P<quote>"[^\"\\]*(?:\\"|[^"])*") # Quote, possibly containing encoded
# quotation mark
\s*(?P<rest>.*)$ # Whitespace & remainder of string""",
re.VERBOSE)
def debug(*args): def debug(*args):
msg = [] msg = []
for arg in args: for arg in args:
@ -144,13 +137,9 @@ def imapsplit(imapstring):
retval.append(parenlist) retval.append(parenlist)
elif workstr[0] == '"': elif workstr[0] == '"':
# quoted fragments '"...\"..."' # quoted fragments '"...\"..."'
m = quotere.match(workstr) (quoted, rest) = _split_quoted(workstr)
if not m: retval.append(quoted)
raise ValueError ("failed to parse " workstr = rest
"quoted component %s " % str(workstr) + \
"while working with %s" % str(imapstring))
retval.append(m.group('quote'))
workstr = m.group('rest')
else: else:
splits = string.split(workstr, maxsplit = 1) splits = string.split(workstr, maxsplit = 1)
splitslen = len(splits) splitslen = len(splits)
@ -222,3 +211,42 @@ def uid_sequence(uidlist):
retval.append(getrange(start, end)) # Add final range/item retval.append(getrange(start, end)) # Add final range/item
return ",".join(retval) return ",".join(retval)
def _split_quoted(string):
"""
Looks for the ending quote character in the string that starts
with quote character, splitting out quoted component and the
rest of the string (without possible space between these two
parts.
First character of the string is taken to be quote character.
Examples:
- "this is \" a test" (\\None) => ("this is \" a test", (\\None))
- "\\" => ("\\", )
"""
if len(string) == 0:
return ('', '')
q = quoted = string[0]
rest = string[1:]
while True:
next_q = rest.find(q)
if next_q == -1:
raise ValueError("can't find ending quote '%s' in '%s'" % (q, string))
# If quote is preceeded by even number of backslashes,
# then it is the ending quote, otherwise the quote
# character is escaped by backslash, so we should
# continue our search.
is_escaped = False
i = next_q - 1
while i >= 0 and rest[i] == '\\':
i -= 1
is_escaped = not is_escaped
quoted += rest[0:next_q + 1]
rest = rest[next_q + 1:]
if not is_escaped:
return (quoted, rest.lstrip())