Make Radicale fast (#569)

* Change get_multi to also return missing items get_multi is not used anywhere and this makes it easier to use. * Use get_multi for report requests * Add get_all to BaseCollection This can be used for optimization on multifilesystem. * Use iterator for files * Remove unnecessary checks This never happens and would be an error. * Don't raise exception when calling get with colliding name This behavior is wrong, it should be handled as if the file doesn't exist. * Use get_all and get_multi to skip unnecessary checks Collision checks are slow on big collections. * Use exception instead of existence checks It's a bit faster. * Use os.scandir instead of os.listdir It's faster and doesn't load all files at once. * Cache metadata when storage is read-only Metadata is queried a lot during a request. It's quiet slow to load and parse the file every time. * Cache the etag when the storage is read-only The etag is calculated twice for GET requests on collections. * Add helper method for cleaning caches * Use item etags to calculate collection etag It's very slow and unnecessary to parse all files with VObject and serialize them again. * Cache serialized collections in file system Serialization is very slow for big collections. This caches the result in a file. * Add helper function for prefilters The simplify_prefilters functions converts XML filters to a simple tag and time range, which can be easily matched against the tag and time range that are extracted from vobject_items by the function find_tag_and_time_range. * Add ability to cache etag and serialization of item Parsing items with vobject is very slow and not required for many requests. Caching can be used to speed it up. * Cache metadata and serialization from items in file system Store the serialized text and the tag and time range from vobject_items in the cache. The metadata is used for prefilters. * Remove the cache for the serialization of collections * Serialize calendars without vobject Merge the calendar components manually. This is much faster and requires less memory. Caching of the result is not required anymore. * Allow pre_filtered_list to indicate that filters match The storage backend can indicate that it evaluated the filters completely. * Skip filtering with vobject if prefiltering is sufficient ``simplify_prefilters`` indicates if the simplified condition is identical to ``filters``. This is used in the multifilesystem backend to detect if prefiltering is sufficient. * Make constants global * Use generator expressions * Only extract elements from inside of VCALENDAR This is unnecessary at the moment, the text representation should never contain anything but VCALENDAR. * Improve comments * restore backward compatiblity * Small improvements for fastbackend
2017-06-02 14:14:55 +02:00
parent 78a62aee86
commit 9ceae0a751
2 changed files with 602 additions and 187 deletions
--- a/radicale/storage.py
+++ b/radicale/storage.py
@@ -27,7 +27,6 @@ entry.
 import binascii
 import contextlib
 import datetime
 import errno
 import json
 import os
@@ -36,6 +35,7 @@ import posixpath
 import shlex
 import stat
 import subprocess
 import sys
 import threading
 import time
 from contextlib import contextmanager
@@ -47,6 +47,10 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory
 import vobject
 if sys.version_info >= (3, 5):
    # HACK: Avoid import cycle for Python < 3.5
    from . import xmlutils
 if os.name == "nt":
    import ctypes
    import ctypes.wintypes
@@ -89,6 +93,10 @@ elif os.name == "posix":
 def load(configuration, logger):
    """Load the storage manager chosen in configuration."""
    if sys.version_info < (3, 5):
        # HACK: Avoid import cycle for Python < 3.5
        global xmlutils
        from . import xmlutils
    storage_type = configuration.get("storage", "type")
    if storage_type == "multifilesystem":
        collection_class = Collection
@@ -107,6 +115,27 @@ def load(configuration, logger):
    return CollectionCopy
 def scandir(path, only_dirs=False, only_files=False):
    """Iterator for directory elements. (For compatibility with Python < 3.5)
    ``only_dirs`` only return directories
    ``only_files`` only return files
    """
    if sys.version_info >= (3, 5):
        for entry in os.scandir(path):
            if ((not only_files or entry.is_file()) and
                    (not only_dirs or entry.is_dir())):
                yield entry.name
    else:
        for name in os.listdir(path):
            p = os.path.join(path, name)
            if ((not only_files or os.path.isfile(p)) and
                    (not only_dirs or os.path.isdir(p))):
                yield name
 def get_etag(text):
    """Etag from collection or item.
@@ -183,9 +212,9 @@ def path_to_filesystem(root, *paths):
            safe_path = os.path.join(safe_path, part)
            # Check for conflicting files (e.g. case-insensitive file systems
            # or short names on Windows file systems)
-            if os.path.lexists(safe_path):
+            if (os.path.lexists(safe_path) and
-                if part not in os.listdir(safe_path_parent):
+                    part not in scandir(safe_path_parent)):
-                    raise CollidingPathError(part)
+                raise CollidingPathError(part)
    return safe_path
@@ -214,19 +243,57 @@ class ComponentNotFoundError(ValueError):
 class Item:
-    def __init__(self, collection, item, href, last_modified=None):
+    def __init__(self, collection, item=None, href=None, last_modified=None,
                 text=None, etag=None):
        """Initialize an item.
        ``collection`` the parent collection.
        ``href`` the href of the item.
        ``last_modified`` the HTTP-datetime of when the item was modified.
        ``text`` the text representation of the item (optional if ``item`` is
        set).
        ``item`` the vobject item (optional if ``text`` is set).
        ``etag`` the etag of the item (optional). See ``get_etag``.
        """
        if text is None and item is None:
            raise ValueError("at least one of 'text' or 'item' must be set")
        self.collection = collection
        self.item = item
        self.href = href
        self.last_modified = last_modified
        self._text = text
        self._item = item
        self._etag = etag
    def __getattr__(self, attr):
        return getattr(self.item, attr)
    def serialize(self):
        if self._text is None:
            self._text = self.item.serialize()
        return self._text
    @property
    def item(self):
        if self._item is None:
            try:
                self._item = vobject.readOne(self._text)
            except Exception as e:
                raise RuntimeError("Failed to parse item %r in %r" %
                                   (self.href, self.collection.path)) from e
        return self._item
    @property
    def etag(self):
        """Encoded as quoted-string (see RFC 2616)."""
-        return get_etag(self.serialize())
+        if self._etag is None:
            self._etag = get_etag(self.serialize())
        return self._etag
 class BaseCollection:
@@ -331,21 +398,54 @@ class BaseCollection:
    def get_multi(self, hrefs):
        """Fetch multiple items. Duplicate hrefs must be ignored.
        DEPRECATED: use ``get_multi2`` instead
        """
        return (self.get(href) for href in set(hrefs))
    def get_multi2(self, hrefs):
        """Fetch multiple items.
        Functionally similar to ``get``, but might bring performance benefits
        on some storages when used cleverly. It's not required to return the
        requested items in the correct order. Duplicated hrefs can be ignored.
        Returns tuples with the href and the item or None if the item doesn't
        exist.
        """
        return ((href, self.get(href)) for href in hrefs)
    def get_all(self):
        """Fetch all items.
        Functionally similar to ``get``, but might bring performance benefits
        on some storages when used cleverly.
        """
-        for href in set(hrefs):
+        return map(self.get, self.list())
-            yield self.get(href)
+
    def get_all_filtered(self, filters):
        """Fetch all items with optional filtering.
        This can largely improve performance of reports depending on
        the filters and this implementation.
        Returns tuples in the form ``(item, filters_matched)``.
        ``filters_matched`` is a bool that indicates if ``filters`` are fully
        matched.
        This returns all events by default
        """
        return ((item, False) for item in self.get_all())
    def pre_filtered_list(self, filters):
        """List collection items with optional pre filtering.
-        This could largely improve performance of reports depending on
+        DEPRECATED: use ``get_all_filtered`` instead
-        the filters and this implementation.
+
        This returns all event by default
        """
-        return [self.get(href) for href in self.list()]
+        return self.get_all()
    def has(self, href):
        """Check if an item exists by its href.
@@ -414,6 +514,8 @@ class Collection(BaseCollection):
        split_path = self.path.split("/")
        self.owner = split_path[0] if len(split_path) > 1 else None
        self.is_principal = principal
        self._meta = None
        self._etag = None
    @classmethod
    def _get_collection_root_folder(cls):
@@ -533,17 +635,15 @@ class Collection(BaseCollection):
        for item in collection.list():
            yield collection.get(item)
-        for href in os.listdir(filesystem_path):
+        for href in scandir(filesystem_path, only_dirs=True):
            if not is_safe_filesystem_path_component(href):
                if not href.startswith(".Radicale"):
                    cls.logger.debug("Skipping collection %r in %r", href,
                                     path)
                continue
-            child_filesystem_path = path_to_filesystem(filesystem_path, href)
+            child_path = posixpath.join(path, href)
-            if os.path.isdir(child_filesystem_path):
+            child_principal = len(attributes) == 0
-                child_path = posixpath.join(path, href)
+            yield cls(child_path, child_principal)
                child_principal = len(attributes) == 0
                yield cls(child_path, child_principal)
    @classmethod
    def create_collection(cls, href, collection=None, props=None):
@@ -724,7 +824,7 @@ class Collection(BaseCollection):
        history_folder = os.path.join(self._filesystem_path,
                                      ".Radicale.cache", "history")
        try:
-            for href in os.listdir(history_folder):
+            for href in scandir(history_folder):
                if not is_safe_filesystem_path_component(href):
                    continue
                if os.path.isfile(os.path.join(self._filesystem_path, href)):
@@ -766,7 +866,7 @@ class Collection(BaseCollection):
        token_name_hash = md5()
        # Find the history of all existing and deleted items
        for href, item in chain(
-                ((item.href, item) for item in self.pre_filtered_list(())),
+                ((item.href, item) for item in self.get_all()),
                ((href, None) for href in self._get_deleted_history_hrefs())):
            history_etag = self._update_history_etag(href, item)
            state[href] = history_etag
@@ -835,43 +935,135 @@ class Collection(BaseCollection):
        return token, changes
    def list(self):
-        for href in os.listdir(self._filesystem_path):
+        for href in scandir(self._filesystem_path, only_files=True):
            if not is_safe_filesystem_path_component(href):
                if not href.startswith(".Radicale"):
                    self.logger.debug(
                        "Skipping item %r in %r", href, self.path)
                continue
-            path = os.path.join(self._filesystem_path, href)
+            yield href
            if os.path.isfile(path):
                yield href
-    def get(self, href):
+    _item_cache_cleaned = False
-        if not href:
+
-            return None
+    def get(self, href, verify_href=True):
-        if not is_safe_filesystem_path_component(href):
+        item, metadata = self._get_with_metadata(href, verify_href=verify_href)
-            self.logger.debug("Can't translate name %r safely to filesystem "
+        return item
-                              "in %r", href, self.path)
+
-            return None
+    def _get_with_metadata(self, href, verify_href=True):
-        path = path_to_filesystem(self._filesystem_path, href)
+        # Like ``get`` but additonally returns the following metadata:
-        if not os.path.isfile(path):
+        # tag, start, end: see ``xmlutils.find_tag_and_time_range``
-            return None
+        if verify_href:
-        with open(path, encoding=self.encoding, newline="") as f:
+            try:
-            text = f.read()
+                if not is_safe_filesystem_path_component(href):
                    raise UnsafePathError(href)
                path = path_to_filesystem(self._filesystem_path, href)
            except ValueError as e:
                self.logger.debug(
                    "Can't translate name %r safely to filesystem in %r: %s",
                    href, self.path, e, exc_info=True)
                return None, None
        else:
            path = os.path.join(self._filesystem_path, href)
        try:
            with open(path, "rb") as f:
                btext = f.read()
        except (FileNotFoundError, IsADirectoryError):
            return None, None
        # The hash of the component in the file system. This is used to check,
        # if the entry in the cache is still valid.
        input_hash = md5()
        input_hash.update(btext)
        input_hash = input_hash.hexdigest()
        cache_folder = os.path.join(self._filesystem_path, ".Radicale.cache",
                                    "item")
        try:
            with open(os.path.join(cache_folder, href), "rb") as f:
                cinput_hash, cetag, ctext, ctag, cstart, cend = pickle.load(f)
        except (FileNotFoundError, pickle.UnpicklingError, ValueError) as e:
            if isinstance(e, (pickle.UnpicklingError, ValueError)):
                self.logger.warning(
                    "Failed to load item cache entry %r in %r: %s",
                    href, self.path, e, exc_info=True)
            cinput_hash = cetag = ctext = ctag = cstart = cend = None
        vobject_item = None
        if input_hash != cinput_hash:
            vobject_item = Item(self, href=href,
                                text=btext.decode(self.encoding)).item
            # Serialize the object again, to normalize the text representation.
            # The storage may have been edited externally.
            ctext = vobject_item.serialize()
            cetag = get_etag(ctext)
            try:
                ctag, cstart, cend = xmlutils.find_tag_and_time_range(
                    vobject_item)
            except Exception as e:
                raise RuntimeError("Failed to find tag and time range of item "
                                   "%r from %r: %s" % (href, self.path,
                                                       e)) from e
            self._makedirs_synced(cache_folder)
            try:
                # Race: Other processes might have created and locked the
                # file.
                with self._atomic_write(os.path.join(cache_folder, href),
                                        "wb") as f:
                    pickle.dump((input_hash, cetag, ctext,
                                 ctag, cstart, cend), f)
            except PermissionError:
                pass
            # Clean cache entries (max once per request)
            # This happens once after new uploads, or if the data in the
            # file system was edited externally.
            if not self._item_cache_cleaned:
                self._item_cache_cleaned = True
                self._clean_cache(cache_folder, (
                    href for href in scandir(cache_folder) if not
                    os.path.isfile(os.path.join(self._filesystem_path, href))))
        last_modified = time.strftime(
            "%a, %d %b %Y %H:%M:%S GMT",
            time.gmtime(os.path.getmtime(path)))
-        try:
+        return Item(self, href=href, last_modified=last_modified, etag=cetag,
-            item = vobject.readOne(text)
+                    text=ctext, item=vobject_item), (ctag, cstart, cend)
-        except Exception as e:
+
-            raise RuntimeError("Failed to parse item %r in %r" %
+    def get_multi2(self, hrefs):
-                               (href, self.path)) from e
+        # It's faster to check for file name collissions here, because
-        return Item(self, item, href, last_modified)
+        # we only need to call os.listdir once.
        files = None
        for href in hrefs:
            if files is None:
                # List dir after hrefs returned one item, the iterator may be
                # empty and the for-loop is never executed.
                files = os.listdir(self._filesystem_path)
            path = os.path.join(self._filesystem_path, href)
            if (not is_safe_filesystem_path_component(href) or
                    href not in files and os.path.lexists(path)):
                self.logger.debug(
                    "Can't translate name safely to filesystem: %r", href)
                yield (href, None)
            else:
                yield (href, self.get(href, verify_href=False))
    def get_all(self):
        # We don't need to check for collissions, because the the file names
        # are from os.listdir.
        return (self.get(href, verify_href=False) for href in self.list())
    def get_all_filtered(self, filters):
        tag, start, end, simple = xmlutils.simplify_prefilters(filters)
        if not tag:
            # no filter
            yield from ((item, simple) for item in self.get_all())
            return
        for item, (itag, istart, iend) in (
                self._get_with_metadata(href, verify_href=False)
                for href in self.list()):
            if tag == itag and istart < end and iend > start:
                yield item, simple and (start <= istart or iend <= end)
    def upload(self, href, vobject_item):
        if not is_safe_filesystem_path_component(href):
            raise UnsafePathError(href)
        path = path_to_filesystem(self._filesystem_path, href)
-        item = Item(self, vobject_item, href)
+        item = Item(self, href=href, item=vobject_item)
        with self._atomic_write(path, newline="") as fd:
            fd.write(item.serialize())
        # Track the change
@@ -907,57 +1099,101 @@ class Collection(BaseCollection):
            self._clean_history_cache()
    def get_meta(self, key=None):
-        if os.path.exists(self._props_path):
+        # reuse cached value if the storage is read-only
-            with open(self._props_path, encoding=self.encoding) as f:
+        if self._writer or self._meta is None:
-                try:
+            try:
-                    meta = json.load(f)
+                with open(self._props_path, encoding=self.encoding) as f:
-                except ValueError as e:
+                    self._meta = json.load(f)
-                    raise RuntimeError("Failed to load properties of collect"
+            except FileNotFoundError:
-                                       "ion %r: %s" % (self.path, e)) from e
+                self._meta = {}
-                return meta.get(key) if key else meta
+            except ValueError as e:
                raise RuntimeError("Failed to load properties of collect"
                                   "ion %r: %s" % (self.path, e)) from e
        return self._meta.get(key) if key else self._meta
    def set_meta(self, props):
-        if os.path.exists(self._props_path):
+        new_props = self.get_meta()
-            with open(self._props_path, encoding=self.encoding) as f:
+        new_props.update(props)
-                old_props = json.load(f)
+        for key in tuple(new_props.keys()):
-                old_props.update(props)
+            if not new_props[key]:
-                props = old_props
+                del new_props[key]
-        props = {key: value for key, value in props.items() if value}
+        with self._atomic_write(self._props_path, "w") as f:
-        with self._atomic_write(self._props_path, "w+") as f:
+            json.dump(new_props, f)
            json.dump(props, f)
    @property
    def last_modified(self):
-        relevant_files = [self._filesystem_path] + [
+        relevant_files = chain(
-            path_to_filesystem(self._filesystem_path, href)
+            (self._filesystem_path,),
-            for href in self.list()]
+            (self._props_path,) if os.path.exists(self._props_path) else (),
-        if os.path.exists(self._props_path):
+            (os.path.join(self._filesystem_path, h) for h in self.list()))
            relevant_files.append(self._props_path)
        last = max(map(os.path.getmtime, relevant_files))
        return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last))
    def serialize(self):
-        items = []
+        # serialize collection
        time_begin = datetime.datetime.now()
        for href in self.list():
            items.append(self.get(href).item)
        time_end = datetime.datetime.now()
        self.logger.info(
            "Read %d items in %.3f seconds from %r", len(items),
            (time_end - time_begin).total_seconds(), self.path)
        if self.get_meta("tag") == "VCALENDAR":
-            collection = vobject.iCalendar()
+            in_vcalendar = False
-            for item in items:
+            vtimezones = ""
-                for content in ("vevent", "vtodo", "vjournal"):
+            included_tzids = set()
-                    if content in item.contents:
+            vtimezone = []
-                        for item_part in getattr(item, "%s_list" % content):
+            tzid = None
-                            collection.add(item_part)
+            components = ""
-                        break
+            # Concatenate all child elements of VCALENDAR from all items
-            return collection.serialize()
+            # together, while preventing duplicated VTIMEZONE entries.
            # VTIMEZONEs are only distinguished by their TZID, if different
            # timezones share the same TZID this produces errornous ouput.
            # VObject fails at this too.
            for item in self.get_all():
                depth = 0
                for line in item.serialize().split("\r\n"):
                    if line.startswith("BEGIN:"):
                        depth += 1
                    if depth == 1 and line == "BEGIN:VCALENDAR":
                        in_vcalendar = True
                    elif in_vcalendar:
                        if depth == 1 and line.startswith("END:"):
                            in_vcalendar = False
                        if depth == 2 and line == "BEGIN:VTIMEZONE":
                            vtimezone.append(line)
                        elif vtimezone:
                            vtimezone.append(line)
                            if depth == 2 and line.startswith("TZID:"):
                                tzid = line[len("TZID:"):]
                            elif depth == 2 and line.startswith("END:"):
                                if tzid is None or tzid not in included_tzids:
                                    if vtimezones:
                                        vtimezones += "\r\n"
                                    vtimezones += "\r\n".join(vtimezone)
                                    included_tzids.add(tzid)
                                vtimezone.clear()
                                tzid = None
                        elif depth >= 2:
                            if components:
                                components += "\r\n"
                            components += line
                    if line.startswith("END:"):
                        depth -= 1
            return "\r\n".join(filter(bool, (
                "BEGIN:VCALENDAR",
                "VERSION:2.0",
                "PRODID:-//PYVOBJECT//NONSGML Version 1//EN",
                vtimezones,
                components,
                "END:VCALENDAR")))
        elif self.get_meta("tag") == "VADDRESSBOOK":
-            return "".join([item.serialize() for item in items])
+            return "".join((item.serialize() for item in self.get_all()))
        return ""
    @property
    def etag(self):
        # reuse cached value if the storage is read-only
        if self._writer or self._etag is None:
            etag = md5()
            for item in self.get_all():
                etag.update((item.href + "/" + item.etag).encode("utf-8"))
            self._etag = '"%s"' % etag.hexdigest()
        return self._etag
    _lock = threading.Lock()
    _waiters = []
    _lock_file = None
--- a/radicale/xmlutils.py
+++ b/radicale/xmlutils.py
@@ -26,12 +26,14 @@ in them for XML requests (all but PUT).
 """
 import copy
 import math
 import posixpath
 import re
 import xml.etree.ElementTree as ET
 from collections import OrderedDict
-from datetime import datetime, timedelta, timezone
+from datetime import date, datetime, timedelta, timezone
 from http import client
 from itertools import chain
 from urllib.parse import quote, unquote, urlparse
 from . import storage
@@ -56,6 +58,13 @@ for short, url in NAMESPACES.items():
 CLARK_TAG_REGEX = re.compile(r"{(?P<namespace>[^}]*)}(?P<tag>.*)", re.VERBOSE)
 HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE)
 DAY = timedelta(days=1)
 SECOND = timedelta(seconds=1)
 DATETIME_MIN = datetime.min.replace(tzinfo=timezone.utc)
 DATETIME_MAX = datetime.max.replace(tzinfo=timezone.utc)
 TIMESTAMP_MIN = math.floor(DATETIME_MIN.timestamp())
 TIMESTAMP_MAX = math.ceil(DATETIME_MAX.timestamp())
 def pretty_xml(element, level=0):
    """Indent an ElementTree ``element`` and its children."""
@@ -210,11 +219,9 @@ def _prop_match(item, filter_):
 def _time_range_match(vobject_item, filter_, child_name):
-    """Check whether the ``item`` matches the time-range ``filter_``.
+    """Check whether the component/property ``child_name`` of
       ``vobject_item`` matches the time-range ``filter_``."""
    See rfc4791-9.9.
    """
    start = filter_.get("start")
    end = filter_.get("end")
    if not start and not end:
@@ -229,14 +236,53 @@ def _time_range_match(vobject_item, filter_, child_name):
        end = datetime.max
    start = start.replace(tzinfo=timezone.utc)
    end = end.replace(tzinfo=timezone.utc)
    child = getattr(vobject_item, child_name.lower())
    matched = False
    def range_fn(range_start, range_end):
        nonlocal matched
        if start < range_end and range_start < end:
            matched = True
            return True
        if end < range_start:
            return True
        return False
    def infinity_fn(start):
        return False
    _visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn)
    return matched
 def _visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn):
    """Visit all time ranges in the component/property ``child_name`` of
    `vobject_item`` with visitors ``range_fn`` and ``infinity_fn``.
    ``range_fn`` gets called for every time_range with ``start`` and ``end``
    datetimes as arguments. If the function returns True, the operation is
    cancelled.
    ``infinity_fn`` gets called when an infiite recurrence rule is detected
    with ``start`` datetime as argument. If the function returns True, the
    operation is cancelled.
    See rfc4791-9.9.
    """
    child = getattr(vobject_item, child_name.lower())
    # Comments give the lines in the tables of the specification
    if child_name == "VEVENT":
        # TODO: check if there's a timezone
        dtstart = child.dtstart.value
        if child.rruleset:
            if (";UNTIL=" not in child.rrule.value and
                    ";COUNT=" not in child.rrule.value):
                for dtstart in child.getrruleset(addRDate=True):
                    if infinity_fn(_date_to_datetime(dtstart)):
                        return
                    break
            dtstarts = child.getrruleset(addRDate=True)
        else:
            dtstarts = (dtstart,)
@@ -255,31 +301,30 @@ def _time_range_match(vobject_item, filter_, child_name):
            dtstart_is_datetime = isinstance(dtstart, datetime)
            dtstart = _date_to_datetime(dtstart)
            if dtstart > end:
                break
            if dtend is not None:
                # Line 1
                dtend = dtstart + timedelta(seconds=original_duration)
-                if start < dtend and end > dtstart:
+                if range_fn(dtstart, dtend):
-                    return True
+                    return
            elif duration is not None:
                if original_duration is None:
                    original_duration = duration.seconds
                if duration.seconds > 0:
                    # Line 2
-                    if start < dtstart + duration and end > dtstart:
+                    if range_fn(dtstart, dtstart + duration):
-                        return True
+                        return
-                elif start <= dtstart and end > dtstart:
+                else:
                    # Line 3
-                    return True
+                    if range_fn(dtstart, dtstart + SECOND):
                        return
            elif dtstart_is_datetime:
                # Line 4
-                if start <= dtstart and end > dtstart:
+                if range_fn(dtstart, dtstart + SECOND):
-                    return True
+                    return
-            elif start < dtstart + timedelta(days=1) and end > dtstart:
+            else:
                # Line 5
-                return True
+                if range_fn(dtstart, dtstart + DAY):
                    return
    elif child_name == "VTODO":
        dtstart = getattr(child, "dtstart", None)
@@ -305,6 +350,12 @@ def _time_range_match(vobject_item, filter_, child_name):
            created = _date_to_datetime(created.value)
        if child.rruleset:
            if (";UNTIL=" not in child.rrule.value and
                    ";COUNT=" not in child.rrule.value):
                for reference_date in child.getrruleset(addRDate=True):
                    if infinity_fn(_date_to_datetime(reference_date)):
                        return
                    break
            reference_dates = child.getrruleset(addRDate=True)
        else:
            if dtstart is not None:
@@ -317,47 +368,56 @@ def _time_range_match(vobject_item, filter_, child_name):
                reference_dates = (created,)
            else:
                # Line 8
-                return True
+                if range_fn(DATETIME_MIN, DATETIME_MAX):
                    return
                reference_dates = ()
        for reference_date in reference_dates:
            reference_date = _date_to_datetime(reference_date)
            if reference_date > end:
                break
            if dtstart is not None and duration is not None:
                # Line 1
-                if start <= reference_date + duration and (
+                if range_fn(reference_date,
-                        end > reference_date or
+                            reference_date + duration + SECOND):
-                        end >= reference_date + duration):
+                    return
-                    return True
+                if range_fn(reference_date + duration - SECOND,
                            reference_date + duration + SECOND):
                    return
            elif dtstart is not None and due is not None:
                # Line 2
                due = reference_date + timedelta(seconds=original_duration)
-                if (start < due or start <= reference_date) and (
+                if (range_fn(reference_date, due) or
-                        end > reference_date or end >= due):
+                        range_fn(reference_date, reference_date + SECOND) or
-                    return True
+                        range_fn(due - SECOND, due) or
                        range_fn(due - SECOND, reference_date + SECOND)):
                    return
            elif dtstart is not None:
-                if start <= reference_date and end > reference_date:
+                if range_fn(reference_date, reference_date + SECOND):
-                    return True
+                    return
            elif due is not None:
                # Line 4
-                if start < reference_date and end >= reference_date:
+                if range_fn(reference_date - SECOND, reference_date):
-                    return True
+                    return
            elif completed is not None and created is not None:
                # Line 5
                completed = reference_date + timedelta(
                    seconds=original_duration)
-                if (start <= reference_date or start <= completed) and (
+                if (range_fn(reference_date - SECOND,
-                        end >= reference_date or end >= completed):
+                             reference_date + SECOND) or
-                    return True
+                        range_fn(completed - SECOND, completed + SECOND) or
                        range_fn(reference_date - SECOND,
                                 reference_date + SECOND) or
                        range_fn(completed - SECOND, completed + SECOND)):
                    return
            elif completed is not None:
                # Line 6
-                if start <= reference_date and end >= reference_date:
+                if range_fn(reference_date - SECOND,
-                    return True
+                            reference_date + SECOND):
                            return
            elif created is not None:
                # Line 7
-                if end > reference_date:
+                if range_fn(reference_date, DATETIME_MAX):
-                    return True
+                    return
    elif child_name == "VJOURNAL":
        dtstart = getattr(child, "dtstart", None)
@@ -365,6 +425,12 @@ def _time_range_match(vobject_item, filter_, child_name):
        if dtstart is not None:
            dtstart = dtstart.value
            if child.rruleset:
                if (";UNTIL=" not in child.rrule.value and
                        ";COUNT=" not in child.rrule.value):
                    for dtstart in child.getrruleset(addRDate=True):
                        if infinity_fn(_date_to_datetime(dtstart)):
                            return
                        break
                dtstarts = child.getrruleset(addRDate=True)
            else:
                dtstarts = (dtstart,)
@@ -373,18 +439,21 @@ def _time_range_match(vobject_item, filter_, child_name):
                dtstart_is_datetime = isinstance(dtstart, datetime)
                dtstart = _date_to_datetime(dtstart)
                if dtstart > end:
                    break
                if dtstart_is_datetime:
                    # Line 1
-                    if start <= dtstart and end > dtstart:
+                    if range_fn(dtstart, dtstart + SECOND):
-                        return True
+                        return
-                elif start < dtstart + timedelta(days=1) and end > dtstart:
+                else:
                    # Line 2
-                    return True
+                    if range_fn(dtstart, dtstart + DAY):
                        return
-    return False
+    elif isinstance(child, date):
        if range_fn(child, child + DAY):
            return
    elif isinstance(child, datetime):
        if range_fn(child, child + SECOND):
            return
 def _text_match(vobject_item, filter_, child_name, attrib_name=None):
@@ -429,6 +498,99 @@ def _param_filter_match(vobject_item, filter_, parent_name):
        return condition
 def simplify_prefilters(filters):
    """Creates a simplified condition from ``filters``.
    Returns a tuple (``tag``, ``start``, ``end``, ``simple``) where ``tag`` is
    a string or None (match all) and ``start`` and ``end`` are POSIX
    timestamps (as int). ``simple`` is a bool that indicates that ``filters``
    and the simplified condition are identical.
    """
    flat_filters = tuple(chain.from_iterable(filters))
    simple = len(flat_filters) <= 1
    for col_filter in flat_filters:
        if (col_filter.tag != _tag("C", "comp-filter") or
                col_filter.get("name") != "VCALENDAR"):
            simple = False
            continue
        simple &= len(col_filter) <= 1
        for comp_filter in col_filter:
            if comp_filter.tag != _tag("C", "comp-filter"):
                simple = False
                continue
            tag = comp_filter.get("name")
            if (tag not in ("VTODO", "VEVENT", "VJOURNAL") or comp_filter.find(
                    _tag("C", "is-not-defined")) is not None):
                simple = False
                continue
            simple &= len(comp_filter) <= 1
            for time_filter in comp_filter:
                if time_filter.tag != _tag("C", "time-range"):
                    simple = False
                    continue
                start = time_filter.get("start")
                end = time_filter.get("end")
                if start:
                    start = math.floor(datetime.strptime(
                        start, "%Y%m%dT%H%M%SZ").replace(
                            tzinfo=timezone.utc).timestamp())
                else:
                    start = TIMESTAMP_MIN
                if end:
                    end = math.ceil(datetime.strptime(
                        end, "%Y%m%dT%H%M%SZ").replace(
                            tzinfo=timezone.utc).timestamp())
                else:
                    end = TIMESTAMP_MAX
                return tag, start, end, simple
            return tag, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
    return None, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
 def find_tag_and_time_range(vobject_item):
    """Find tag and enclosing time range from ``vobject item``.
    Returns a tuple (``tag``, ``start``, ``end``) where ``tag`` is a string
    and ``start`` and ``end`` are POSIX timestamps (as int).
    This is intened to be used for matching against simplified prefilters.
    """
    tag = ""
    if vobject_item.name == "VCALENDAR":
        for component in vobject_item.components():
            if component.name in ("VTODO", "VEVENT", "VJOURNAL"):
                tag = component.name
                break
    if not tag:
        return (None, math.floor(DATETIME_MIN.timestamp()),
                math.ceil(DATETIME_MAX.timestamp()))
    start = end = None
    def range_fn(range_start, range_end):
        nonlocal start, end
        if start is None or range_start < start:
            start = range_start
        if end is None or end < range_end:
            end = range_end
        return False
    def infinity_fn(range_start):
        nonlocal start, end
        if start is None or range_start < start:
            start = range_start
        end = DATETIME_MAX
        return True
    _visit_time_ranges(vobject_item, tag, range_fn, infinity_fn)
    if start is None:
        start = DATETIME_MIN
    if end is None:
        end = DATETIME_MAX
    return tag, math.floor(start.timestamp()), math.ceil(end.timestamp())
 def name_from_path(path, collection):
    """Return Radicale item name from ``path``."""
    path = path.strip("/") + "/"
@@ -891,70 +1053,87 @@ def report(base_prefix, path, xml_request, collection):
        root.findall("./%s" % _tag("C", "filter")) +
        root.findall("./%s" % _tag("CR", "filter")))
-    for hreference in hreferences:
+    def retrieve_items(collection, hreferences, multistatus):
-        try:
+        """Retrieves all items that are referenced in ``hreferences`` from
-            name = name_from_path(hreference, collection)
+           ``collection`` and adds 404 responses for missing and invalid items
-        except ValueError as e:
+           to ``multistatus``."""
-            collection.logger.warning("Skipping invalid path %r in REPORT "
+        collection_requested = False
-                                      "request on %r: %s", hreference, path, e)
+
-            response = _item_response(base_prefix, hreference,
+        def get_names():
-                                      found_item=False)
+            """Extracts all names from references in ``hreferences`` and adds
-            multistatus.append(response)
+               404 responses for invalid references to ``multistatus``.
-            continue
+               If the whole collections is referenced ``collection_requested``
-        if name:
+               gets set to ``True``."""
-            # Reference is an item
+            nonlocal collection_requested
-            item = collection.get(name)
+            for hreference in hreferences:
                try:
                    name = name_from_path(hreference, collection)
                except ValueError as e:
                    collection.logger.warning(
                        "Skipping invalid path %r in REPORT request on %r: %s",
                        hreference, path, e)
                    response = _item_response(base_prefix, hreference,
                                              found_item=False)
                    multistatus.append(response)
                    continue
                if name:
                    # Reference is an item
                    yield name
                else:
                    # Reference is a collection
                    collection_requested = True
        for name, item in collection.get_multi2(get_names()):
            if not item:
-                response = _item_response(base_prefix, hreference,
+                uri = "/" + posixpath.join(collection.path, name)
                response = _item_response(base_prefix, uri,
                                          found_item=False)
                multistatus.append(response)
-                continue
+            else:
-            items = [item]
+                yield item, False
-        else:
+        if collection_requested:
-            # Reference is a collection
+            yield from collection.get_all_filtered(filters)
            items = collection.pre_filtered_list(filters)
-        for item in items:
+    for item, filters_matched in retrieve_items(collection, hreferences,
-            if not item:
+                                                multistatus):
-                continue
+        if filters and not filters_matched:
-            if filters:
+            match = (
-                try:
+                _comp_match if collection.get_meta("tag") == "VCALENDAR"
-                    match = (_comp_match
+                else _prop_match)
-                             if collection.get_meta("tag") == "VCALENDAR"
+            try:
-                             else _prop_match)
+                if not all(match(item, filter_[0]) for filter_ in filters
-                    if not all(match(item, filter_[0]) for filter_ in filters
+                           if filter_):
-                               if filter_):
+                    continue
-                        continue
+            except Exception as e:
-                except Exception as e:
+                raise RuntimeError("Failed to filter item %r from %r: %s" %
-                    raise RuntimeError("Failed to filter item %r from %r: %s" %
+                                   (item.href, collection.path, e)) from e
                                       (collection.path, item.href, e)) from e
-            found_props = []
+        found_props = []
-            not_found_props = []
+        not_found_props = []
-            for tag in props:
+        for tag in props:
-                element = ET.Element(tag)
+            element = ET.Element(tag)
-                if tag == _tag("D", "getetag"):
+            if tag == _tag("D", "getetag"):
-                    element.text = item.etag
+                element.text = item.etag
-                    found_props.append(element)
+                found_props.append(element)
-                elif tag == _tag("D", "getcontenttype"):
+            elif tag == _tag("D", "getcontenttype"):
-                    name = item.name.lower()
+                name = item.name.lower()
-                    mimetype = (
+                mimetype = (
-                        "text/vcard" if name == "vcard" else "text/calendar")
+                    "text/vcard" if name == "vcard" else "text/calendar")
-                    element.text = "%s; component=%s" % (mimetype, name)
+                element.text = "%s; component=%s" % (mimetype, name)
-                    found_props.append(element)
+                found_props.append(element)
-                elif tag in (
+            elif tag in (
-                        _tag("C", "calendar-data"),
+                    _tag("C", "calendar-data"),
-                        _tag("CR", "address-data")):
+                    _tag("CR", "address-data")):
-                    element.text = item.serialize()
+                element.text = item.serialize()
-                    found_props.append(element)
+                found_props.append(element)
-                else:
+            else:
-                    not_found_props.append(element)
+                not_found_props.append(element)
-            uri = "/" + posixpath.join(collection.path, item.href)
+        uri = "/" + posixpath.join(collection.path, item.href)
-            multistatus.append(_item_response(
+        multistatus.append(_item_response(
-                base_prefix, uri, found_props=found_props,
+            base_prefix, uri, found_props=found_props,
-                not_found_props=not_found_props, found_item=True))
+            not_found_props=not_found_props, found_item=True))
    return client.MULTI_STATUS, multistatus