Improve handling of XML requests and responses

* Move parsing/serialization of XML requests/responses from ``xmlutils.py`` to ``__init__.py``.
  * Log XML requests/responses in pretty-printed form.
      * Previously only the responses were logged in readable form. This is useful for debugging.
      * The XML documents are only converted for pretty-printing if debugging is enabled (it's expensive)
  * Send XML responses in minimized form to clients.
  * Add **encoding** attribute to XML declaration in XML response.
  * Only decode XML requests once. (Previously they were decoded, encoded and decoded again.)
This commit is contained in:
Unrud 2017-05-07 08:17:39 +02:00
parent f1a9cf7694
commit 11c5dfdb53
2 changed files with 85 additions and 51 deletions

View File

@ -31,6 +31,7 @@ import contextlib
import datetime import datetime
import io import io
import itertools import itertools
import logging
import os import os
import posixpath import posixpath
import pprint import pprint
@ -46,6 +47,7 @@ import wsgiref.simple_server
import zlib import zlib
from http import client from http import client
from urllib.parse import unquote, urlparse from urllib.parse import unquote, urlparse
from xml.etree import ElementTree as ET
import vobject import vobject
@ -463,16 +465,43 @@ class Application:
allowed |= self.authorized(user, parent_path, permission) allowed |= self.authorized(user, parent_path, permission)
return allowed return allowed
def _read_content(self, environ): def _read_raw_content(self, environ):
content_length = int(environ.get("CONTENT_LENGTH") or 0) content_length = int(environ.get("CONTENT_LENGTH") or 0)
if content_length > 0: if not content_length:
content = self.decode( return b""
environ["wsgi.input"].read(content_length), environ) content = environ["wsgi.input"].read(content_length)
self.logger.debug("Request content:\n%s", content.strip()) if len(content) < content_length:
else: raise ValueError("Request body too short: %d" % len(content))
content = None
return content return content
def _read_content(self, environ):
content = self.decode(self._read_raw_content(environ), environ)
self.logger.debug("Request content:\n%s", content)
return content
def _read_xml_content(self, environ):
content = self.decode(self._read_raw_content(environ), environ)
if not content:
return None
try:
xml_content = ET.fromstring(content)
except ET.ParseError:
self.logger.debug("Request content (Invalid XML):\n%s", content)
raise
if self.logger.isEnabledFor(logging.DEBUG):
self.logger.debug("Request content:\n%s",
xmlutils.pretty_xml(xml_content))
return xml_content
def _write_xml_content(self, xml_content):
if self.logger.isEnabledFor(logging.DEBUG):
self.logger.debug("Response content:\n%s",
xmlutils.pretty_xml(xml_content))
f = io.BytesIO()
ET.ElementTree(xml_content).write(f, encoding=self.encoding,
xml_declaration=True)
return f.getvalue()
def do_DELETE(self, environ, base_prefix, path, user): def do_DELETE(self, environ, base_prefix, path, user):
"""Manage DELETE request.""" """Manage DELETE request."""
if not self._access(user, path, "w"): if not self._access(user, path, "w"):
@ -488,11 +517,12 @@ class Application:
# ETag precondition not verified, do not delete item # ETag precondition not verified, do not delete item
return PRECONDITION_FAILED return PRECONDITION_FAILED
if isinstance(item, self.Collection): if isinstance(item, self.Collection):
answer = xmlutils.delete(base_prefix, path, item) xml_answer = xmlutils.delete(base_prefix, path, item)
else: else:
answer = xmlutils.delete( xml_answer = xmlutils.delete(
base_prefix, path, item.collection, item.href) base_prefix, path, item.collection, item.href)
return client.OK, {"Content-Type": "text/xml"}, answer headers = {"Content-Type": "text/xml; charset=%s" % self.encoding}
return client.OK, headers, self._write_xml_content(xml_answer)
def do_GET(self, environ, base_prefix, path, user): def do_GET(self, environ, base_prefix, path, user):
"""Manage GET request.""" """Manage GET request."""
@ -533,12 +563,12 @@ class Application:
"""Manage MKCALENDAR request.""" """Manage MKCALENDAR request."""
if not self.authorized(user, path, "w"): if not self.authorized(user, path, "w"):
return NOT_ALLOWED return NOT_ALLOWED
content = self._read_content(environ) xml_content = self._read_xml_content(environ)
with self.Collection.acquire_lock("w", user): with self.Collection.acquire_lock("w", user):
item = next(self.Collection.discover(path), None) item = next(self.Collection.discover(path), None)
if item: if item:
return WEBDAV_PRECONDITION_FAILED return WEBDAV_PRECONDITION_FAILED
props = xmlutils.props_from_request(content) props = xmlutils.props_from_request(xml_content)
props["tag"] = "VCALENDAR" props["tag"] = "VCALENDAR"
# TODO: use this? # TODO: use this?
# timezone = props.get("C:calendar-timezone") # timezone = props.get("C:calendar-timezone")
@ -549,12 +579,12 @@ class Application:
"""Manage MKCOL request.""" """Manage MKCOL request."""
if not self.authorized(user, path, "w"): if not self.authorized(user, path, "w"):
return NOT_ALLOWED return NOT_ALLOWED
content = self._read_content(environ) xml_content = self._read_xml_content(environ)
with self.Collection.acquire_lock("w", user): with self.Collection.acquire_lock("w", user):
item = next(self.Collection.discover(path), None) item = next(self.Collection.discover(path), None)
if item: if item:
return WEBDAV_PRECONDITION_FAILED return WEBDAV_PRECONDITION_FAILED
props = xmlutils.props_from_request(content) props = xmlutils.props_from_request(xml_content)
self.Collection.create_collection(path, props=props) self.Collection.create_collection(path, props=props)
return client.CREATED, {}, None return client.CREATED, {}, None
@ -607,7 +637,7 @@ class Application:
"""Manage PROPFIND request.""" """Manage PROPFIND request."""
if not self._access(user, path, "r"): if not self._access(user, path, "r"):
return NOT_ALLOWED return NOT_ALLOWED
content = self._read_content(environ) xml_content = self._read_xml_content(environ)
with self.Collection.acquire_lock("r", user): with self.Collection.acquire_lock("r", user):
items = self.Collection.discover( items = self.Collection.discover(
path, environ.get("HTTP_DEPTH", "0")) path, environ.get("HTTP_DEPTH", "0"))
@ -620,26 +650,30 @@ class Application:
# put item back # put item back
items = itertools.chain([item], items) items = itertools.chain([item], items)
read_items, write_items = self.collect_allowed_items(items, user) read_items, write_items = self.collect_allowed_items(items, user)
headers = {"DAV": DAV_HEADERS, "Content-Type": "text/xml"} headers = {"DAV": DAV_HEADERS,
status, answer = xmlutils.propfind( "Content-Type": "text/xml; charset=%s" % self.encoding}
base_prefix, path, content, read_items, write_items, user) status, xml_answer = xmlutils.propfind(
base_prefix, path, xml_content, read_items, write_items, user)
if status == client.FORBIDDEN: if status == client.FORBIDDEN:
return NOT_ALLOWED return NOT_ALLOWED
else: else:
return status, headers, answer return status, headers, self._write_xml_content(xml_answer)
def do_PROPPATCH(self, environ, base_prefix, path, user): def do_PROPPATCH(self, environ, base_prefix, path, user):
"""Manage PROPPATCH request.""" """Manage PROPPATCH request."""
if not self.authorized(user, path, "w"): if not self.authorized(user, path, "w"):
return NOT_ALLOWED return NOT_ALLOWED
content = self._read_content(environ) xml_content = self._read_xml_content(environ)
with self.Collection.acquire_lock("w", user): with self.Collection.acquire_lock("w", user):
item = next(self.Collection.discover(path), None) item = next(self.Collection.discover(path), None)
if not isinstance(item, self.Collection): if not isinstance(item, self.Collection):
return WEBDAV_PRECONDITION_FAILED return WEBDAV_PRECONDITION_FAILED
headers = {"DAV": DAV_HEADERS, "Content-Type": "text/xml"} headers = {"DAV": DAV_HEADERS,
answer = xmlutils.proppatch(base_prefix, path, content, item) "Content-Type": "text/xml; charset=%s" % self.encoding}
return client.MULTI_STATUS, headers, answer xml_answer = xmlutils.proppatch(base_prefix, path, xml_content,
item)
return (client.MULTI_STATUS, headers,
self._write_xml_content(xml_answer))
def do_PUT(self, environ, base_prefix, path, user): def do_PUT(self, environ, base_prefix, path, user):
"""Manage PUT request.""" """Manage PUT request."""
@ -697,7 +731,7 @@ class Application:
"""Manage REPORT request.""" """Manage REPORT request."""
if not self._access(user, path, "r"): if not self._access(user, path, "r"):
return NOT_ALLOWED return NOT_ALLOWED
content = self._read_content(environ) xml_content = self._read_xml_content(environ)
with self.Collection.acquire_lock("r", user): with self.Collection.acquire_lock("r", user):
item = next(self.Collection.discover(path), None) item = next(self.Collection.discover(path), None)
if not self._access(user, path, "r", item): if not self._access(user, path, "r", item):
@ -708,6 +742,8 @@ class Application:
collection = item collection = item
else: else:
collection = item.collection collection = item.collection
headers = {"Content-Type": "text/xml"} headers = {"Content-Type": "text/xml; charset=%s" % self.encoding}
answer = xmlutils.report(base_prefix, path, content, collection) xml_answer = xmlutils.report(
return client.MULTI_STATUS, headers, answer base_prefix, path, xml_content, collection)
return (client.MULTI_STATUS, headers,
self._write_xml_content(xml_answer))

View File

@ -25,6 +25,7 @@ in them for XML requests (all but PUT).
""" """
import copy
import posixpath import posixpath
import re import re
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
@ -56,8 +57,10 @@ CLARK_TAG_REGEX = re.compile(r"{(?P<namespace>[^}]*)}(?P<tag>.*)", re.VERBOSE)
HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE) HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE)
def _pretty_xml(element, level=0): def pretty_xml(element, level=0):
"""Indent an ElementTree ``element`` and its children.""" """Indent an ElementTree ``element`` and its children."""
if not level:
element = copy.deepcopy(element)
i = "\n" + level * " " i = "\n" + level * " "
if len(element): if len(element):
if not element.text or not element.text.strip(): if not element.text or not element.text.strip():
@ -65,7 +68,7 @@ def _pretty_xml(element, level=0):
if not element.tail or not element.tail.strip(): if not element.tail or not element.tail.strip():
element.tail = i element.tail = i
for sub_element in element: for sub_element in element:
_pretty_xml(sub_element, level + 1) pretty_xml(sub_element, level + 1)
if not sub_element.tail or not sub_element.tail.strip(): if not sub_element.tail or not sub_element.tail.strip():
sub_element.tail = i sub_element.tail = i
else: else:
@ -439,21 +442,18 @@ def name_from_path(path, collection):
return name return name
def props_from_request(root, actions=("set", "remove")): def props_from_request(xml_request, actions=("set", "remove")):
"""Return a list of properties as a dictionary.""" """Return a list of properties as a dictionary."""
result = OrderedDict() result = OrderedDict()
if root: if xml_request is None:
if not hasattr(root, "tag"):
root = ET.fromstring(root.encode("utf8"))
else:
return result return result
for action in actions: for action in actions:
action_element = root.find(_tag("D", action)) action_element = xml_request.find(_tag("D", action))
if action_element is not None: if action_element is not None:
break break
else: else:
action_element = root action_element = xml_request
prop_element = action_element.find(_tag("D", "prop")) prop_element = action_element.find(_tag("D", "prop"))
if prop_element is not None: if prop_element is not None:
@ -497,7 +497,7 @@ def delete(base_prefix, path, collection, href=None):
status.text = _response(200) status.text = _response(200)
response.append(status) response.append(status)
return _pretty_xml(multistatus) return multistatus
def propfind(base_prefix, path, xml_request, read_collections, def propfind(base_prefix, path, xml_request, read_collections,
@ -510,12 +510,10 @@ def propfind(base_prefix, path, xml_request, read_collections,
in the output. in the output.
""" """
# Reading request
root = ET.fromstring(xml_request.encode("utf8")) if xml_request else None
# A client may choose not to submit a request body. An empty PROPFIND # A client may choose not to submit a request body. An empty PROPFIND
# request body MUST be treated as if it were an 'allprop' request. # request body MUST be treated as if it were an 'allprop' request.
top_tag = root[0] if root is not None else ET.Element(_tag("D", "allprop")) top_tag = (xml_request[0] if xml_request is not None else
ET.Element(_tag("D", "allprop")))
props = () props = ()
if top_tag.tag == _tag("D", "allprop"): if top_tag.tag == _tag("D", "allprop"):
@ -567,7 +565,7 @@ def propfind(base_prefix, path, xml_request, read_collections,
if response: if response:
multistatus.append(response) multistatus.append(response)
return client.MULTI_STATUS, _pretty_xml(multistatus) return client.MULTI_STATUS, multistatus
def _propfind_response(base_prefix, path, item, props, user, write=False, def _propfind_response(base_prefix, path, item, props, user, write=False,
@ -802,9 +800,8 @@ def proppatch(base_prefix, path, xml_request, collection):
Read rfc4918-9.2 for info. Read rfc4918-9.2 for info.
""" """
root = ET.fromstring(xml_request.encode("utf8")) props_to_set = props_from_request(xml_request, actions=("set",))
props_to_set = props_from_request(root, actions=("set",)) props_to_remove = props_from_request(xml_request, actions=("remove",))
props_to_remove = props_from_request(root, actions=("remove",))
multistatus = ET.Element(_tag("D", "multistatus")) multistatus = ET.Element(_tag("D", "multistatus"))
response = ET.Element(_tag("D", "response")) response = ET.Element(_tag("D", "response"))
@ -821,7 +818,7 @@ def proppatch(base_prefix, path, xml_request, collection):
for short_name in props_to_set: for short_name in props_to_set:
_add_propstat_to(response, short_name, 200) _add_propstat_to(response, short_name, 200)
return _pretty_xml(multistatus) return multistatus
def report(base_prefix, path, xml_request, collection): def report(base_prefix, path, xml_request, collection):
@ -830,7 +827,10 @@ def report(base_prefix, path, xml_request, collection):
Read rfc3253-3.6 for info. Read rfc3253-3.6 for info.
""" """
root = ET.fromstring(xml_request.encode("utf8")) multistatus = ET.Element(_tag("D", "multistatus"))
if xml_request is None:
return multistatus
root = xml_request
if root.tag in ( if root.tag in (
_tag("D", "principal-search-property-set"), _tag("D", "principal-search-property-set"),
_tag("D", "principal-property-search"), _tag("D", "principal-property-search"),
@ -840,7 +840,7 @@ def report(base_prefix, path, xml_request, collection):
# InfCloud asks for expand-property reports (even if we don't announce # InfCloud asks for expand-property reports (even if we don't announce
# support for them) and stops working if an error code is returned. # support for them) and stops working if an error code is returned.
collection.logger.warning("Unsupported report method: %s", root.tag) collection.logger.warning("Unsupported report method: %s", root.tag)
return _pretty_xml(ET.Element(_tag("D", "multistatus"))) return multistatus
prop_element = root.find(_tag("D", "prop")) prop_element = root.find(_tag("D", "prop"))
props = ( props = (
[prop.tag for prop in prop_element] [prop.tag for prop in prop_element]
@ -865,8 +865,6 @@ def report(base_prefix, path, xml_request, collection):
root.findall("./%s" % _tag("C", "filter")) + root.findall("./%s" % _tag("C", "filter")) +
root.findall("./%s" % _tag("CR", "filter"))) root.findall("./%s" % _tag("CR", "filter")))
multistatus = ET.Element(_tag("D", "multistatus"))
for hreference in hreferences: for hreference in hreferences:
try: try:
name = name_from_path(hreference, collection) name = name_from_path(hreference, collection)
@ -927,7 +925,7 @@ def report(base_prefix, path, xml_request, collection):
base_prefix, uri, found_props=found_props, base_prefix, uri, found_props=found_props,
not_found_props=not_found_props, found_item=True)) not_found_props=not_found_props, found_item=True))
return _pretty_xml(multistatus) return multistatus
def _item_response(base_prefix, href, found_props=(), not_found_props=(), def _item_response(base_prefix, href, found_props=(), not_found_props=(),