Make Radicale fast (#569)

* Change get_multi to also return missing items get_multi is not used anywhere and this makes it easier to use. * Use get_multi for report requests * Add get_all to BaseCollection This can be used for optimization on multifilesystem. * Use iterator for files * Remove unnecessary checks This never happens and would be an error. * Don't raise exception when calling get with colliding name This behavior is wrong, it should be handled as if the file doesn't exist. * Use get_all and get_multi to skip unnecessary checks Collision checks are slow on big collections. * Use exception instead of existence checks It's a bit faster. * Use os.scandir instead of os.listdir It's faster and doesn't load all files at once. * Cache metadata when storage is read-only Metadata is queried a lot during a request. It's quiet slow to load and parse the file every time. * Cache the etag when the storage is read-only The etag is calculated twice for GET requests on collections. * Add helper method for cleaning caches * Use item etags to calculate collection etag It's very slow and unnecessary to parse all files with VObject and serialize them again. * Cache serialized collections in file system Serialization is very slow for big collections. This caches the result in a file. * Add helper function for prefilters The simplify_prefilters functions converts XML filters to a simple tag and time range, which can be easily matched against the tag and time range that are extracted from vobject_items by the function find_tag_and_time_range. * Add ability to cache etag and serialization of item Parsing items with vobject is very slow and not required for many requests. Caching can be used to speed it up. * Cache metadata and serialization from items in file system Store the serialized text and the tag and time range from vobject_items in the cache. The metadata is used for prefilters. * Remove the cache for the serialization of collections * Serialize calendars without vobject Merge the calendar components manually. This is much faster and requires less memory. Caching of the result is not required anymore. * Allow pre_filtered_list to indicate that filters match The storage backend can indicate that it evaluated the filters completely. * Skip filtering with vobject if prefiltering is sufficient ``simplify_prefilters`` indicates if the simplified condition is identical to ``filters``. This is used in the multifilesystem backend to detect if prefiltering is sufficient. * Make constants global * Use generator expressions * Only extract elements from inside of VCALENDAR This is unnecessary at the moment, the text representation should never contain anything but VCALENDAR. * Improve comments * restore backward compatiblity * Small improvements for fastbackend
2017-06-02 14:14:55 +02:00
parent 78a62aee86
commit 9ceae0a751
2 changed files with 602 additions and 187 deletions
--- a/radicale/storage.py
+++ b/radicale/storage.py
@@ -27,7 +27,6 @@ entry.

 import binascii
 import contextlib
-import datetime
 import errno
 import json
 import os
@@ -36,6 +35,7 @@ import posixpath
 import shlex
 import stat
 import subprocess
+import sys
 import threading
 import time
 from contextlib import contextmanager
@@ -47,6 +47,10 @@ from tempfile import NamedTemporaryFile, TemporaryDirectory

 import vobject

+if sys.version_info >= (3, 5):
+    # HACK: Avoid import cycle for Python < 3.5
+    from . import xmlutils
+
 if os.name == "nt":
    import ctypes
    import ctypes.wintypes
@@ -89,6 +93,10 @@ elif os.name == "posix":

 def load(configuration, logger):
    """Load the storage manager chosen in configuration."""
+    if sys.version_info < (3, 5):
+        # HACK: Avoid import cycle for Python < 3.5
+        global xmlutils
+        from . import xmlutils
    storage_type = configuration.get("storage", "type")
    if storage_type == "multifilesystem":
        collection_class = Collection
@@ -107,6 +115,27 @@ def load(configuration, logger):
    return CollectionCopy


+def scandir(path, only_dirs=False, only_files=False):
+    """Iterator for directory elements. (For compatibility with Python < 3.5)
+
+    ``only_dirs`` only return directories
+
+    ``only_files`` only return files
+
+    """
+    if sys.version_info >= (3, 5):
+        for entry in os.scandir(path):
+            if ((not only_files or entry.is_file()) and
+                    (not only_dirs or entry.is_dir())):
+                yield entry.name
+    else:
+        for name in os.listdir(path):
+            p = os.path.join(path, name)
+            if ((not only_files or os.path.isfile(p)) and
+                    (not only_dirs or os.path.isdir(p))):
+                yield name
+
+
 def get_etag(text):
    """Etag from collection or item.

@@ -183,8 +212,8 @@ def path_to_filesystem(root, *paths):
            safe_path = os.path.join(safe_path, part)
            # Check for conflicting files (e.g. case-insensitive file systems
            # or short names on Windows file systems)
-            if os.path.lexists(safe_path):
-                if part not in os.listdir(safe_path_parent):
+            if (os.path.lexists(safe_path) and
+                    part not in scandir(safe_path_parent)):
                raise CollidingPathError(part)
    return safe_path

@@ -214,19 +243,57 @@ class ComponentNotFoundError(ValueError):


 class Item:
-    def __init__(self, collection, item, href, last_modified=None):
+    def __init__(self, collection, item=None, href=None, last_modified=None,
+                 text=None, etag=None):
+        """Initialize an item.
+
+        ``collection`` the parent collection.
+
+        ``href`` the href of the item.
+
+        ``last_modified`` the HTTP-datetime of when the item was modified.
+
+        ``text`` the text representation of the item (optional if ``item`` is
+        set).
+
+        ``item`` the vobject item (optional if ``text`` is set).
+
+        ``etag`` the etag of the item (optional). See ``get_etag``.
+
+        """
+        if text is None and item is None:
+            raise ValueError("at least one of 'text' or 'item' must be set")
        self.collection = collection
-        self.item = item
        self.href = href
        self.last_modified = last_modified
+        self._text = text
+        self._item = item
+        self._etag = etag

    def __getattr__(self, attr):
        return getattr(self.item, attr)

+    def serialize(self):
+        if self._text is None:
+            self._text = self.item.serialize()
+        return self._text
+
+    @property
+    def item(self):
+        if self._item is None:
+            try:
+                self._item = vobject.readOne(self._text)
+            except Exception as e:
+                raise RuntimeError("Failed to parse item %r in %r" %
+                                   (self.href, self.collection.path)) from e
+        return self._item
+
    @property
    def etag(self):
        """Encoded as quoted-string (see RFC 2616)."""
-        return get_etag(self.serialize())
+        if self._etag is None:
+            self._etag = get_etag(self.serialize())
+        return self._etag


 class BaseCollection:
@@ -331,21 +398,54 @@ class BaseCollection:
    def get_multi(self, hrefs):
        """Fetch multiple items. Duplicate hrefs must be ignored.

+        DEPRECATED: use ``get_multi2`` instead
+
+        """
+        return (self.get(href) for href in set(hrefs))
+
+    def get_multi2(self, hrefs):
+        """Fetch multiple items.
+
+        Functionally similar to ``get``, but might bring performance benefits
+        on some storages when used cleverly. It's not required to return the
+        requested items in the correct order. Duplicated hrefs can be ignored.
+
+        Returns tuples with the href and the item or None if the item doesn't
+        exist.
+
+        """
+        return ((href, self.get(href)) for href in hrefs)
+
+    def get_all(self):
+        """Fetch all items.
+
        Functionally similar to ``get``, but might bring performance benefits
        on some storages when used cleverly.

        """
-        for href in set(hrefs):
-            yield self.get(href)
+        return map(self.get, self.list())
+
+    def get_all_filtered(self, filters):
+        """Fetch all items with optional filtering.
+
+        This can largely improve performance of reports depending on
+        the filters and this implementation.
+
+        Returns tuples in the form ``(item, filters_matched)``.
+        ``filters_matched`` is a bool that indicates if ``filters`` are fully
+        matched.
+
+        This returns all events by default
+        """
+        return ((item, False) for item in self.get_all())

    def pre_filtered_list(self, filters):
        """List collection items with optional pre filtering.

-        This could largely improve performance of reports depending on
-        the filters and this implementation.
-        This returns all event by default
+        DEPRECATED: use ``get_all_filtered`` instead
+
        """
-        return [self.get(href) for href in self.list()]
+        return self.get_all()

    def has(self, href):
        """Check if an item exists by its href.
@@ -414,6 +514,8 @@ class Collection(BaseCollection):
        split_path = self.path.split("/")
        self.owner = split_path[0] if len(split_path) > 1 else None
        self.is_principal = principal
+        self._meta = None
+        self._etag = None

    @classmethod
    def _get_collection_root_folder(cls):
@@ -533,14 +635,12 @@ class Collection(BaseCollection):
        for item in collection.list():
            yield collection.get(item)

-        for href in os.listdir(filesystem_path):
+        for href in scandir(filesystem_path, only_dirs=True):
            if not is_safe_filesystem_path_component(href):
                if not href.startswith(".Radicale"):
                    cls.logger.debug("Skipping collection %r in %r", href,
                                     path)
                continue
-            child_filesystem_path = path_to_filesystem(filesystem_path, href)
-            if os.path.isdir(child_filesystem_path):
            child_path = posixpath.join(path, href)
            child_principal = len(attributes) == 0
            yield cls(child_path, child_principal)
@@ -724,7 +824,7 @@ class Collection(BaseCollection):
        history_folder = os.path.join(self._filesystem_path,
                                      ".Radicale.cache", "history")
        try:
-            for href in os.listdir(history_folder):
+            for href in scandir(history_folder):
                if not is_safe_filesystem_path_component(href):
                    continue
                if os.path.isfile(os.path.join(self._filesystem_path, href)):
@@ -766,7 +866,7 @@ class Collection(BaseCollection):
        token_name_hash = md5()
        # Find the history of all existing and deleted items
        for href, item in chain(
-                ((item.href, item) for item in self.pre_filtered_list(())),
+                ((item.href, item) for item in self.get_all()),
                ((href, None) for href in self._get_deleted_history_hrefs())):
            history_etag = self._update_history_etag(href, item)
            state[href] = history_etag
@@ -835,43 +935,135 @@ class Collection(BaseCollection):
        return token, changes

    def list(self):
-        for href in os.listdir(self._filesystem_path):
+        for href in scandir(self._filesystem_path, only_files=True):
            if not is_safe_filesystem_path_component(href):
                if not href.startswith(".Radicale"):
                    self.logger.debug(
                        "Skipping item %r in %r", href, self.path)
                continue
-            path = os.path.join(self._filesystem_path, href)
-            if os.path.isfile(path):
            yield href

-    def get(self, href):
-        if not href:
-            return None
+    _item_cache_cleaned = False
+
+    def get(self, href, verify_href=True):
+        item, metadata = self._get_with_metadata(href, verify_href=verify_href)
+        return item
+
+    def _get_with_metadata(self, href, verify_href=True):
+        # Like ``get`` but additonally returns the following metadata:
+        # tag, start, end: see ``xmlutils.find_tag_and_time_range``
+        if verify_href:
+            try:
                if not is_safe_filesystem_path_component(href):
-            self.logger.debug("Can't translate name %r safely to filesystem "
-                              "in %r", href, self.path)
-            return None
+                    raise UnsafePathError(href)
                path = path_to_filesystem(self._filesystem_path, href)
-        if not os.path.isfile(path):
-            return None
-        with open(path, encoding=self.encoding, newline="") as f:
-            text = f.read()
+            except ValueError as e:
+                self.logger.debug(
+                    "Can't translate name %r safely to filesystem in %r: %s",
+                    href, self.path, e, exc_info=True)
+                return None, None
+        else:
+            path = os.path.join(self._filesystem_path, href)
+        try:
+            with open(path, "rb") as f:
+                btext = f.read()
+        except (FileNotFoundError, IsADirectoryError):
+            return None, None
+        # The hash of the component in the file system. This is used to check,
+        # if the entry in the cache is still valid.
+        input_hash = md5()
+        input_hash.update(btext)
+        input_hash = input_hash.hexdigest()
+        cache_folder = os.path.join(self._filesystem_path, ".Radicale.cache",
+                                    "item")
+        try:
+            with open(os.path.join(cache_folder, href), "rb") as f:
+                cinput_hash, cetag, ctext, ctag, cstart, cend = pickle.load(f)
+        except (FileNotFoundError, pickle.UnpicklingError, ValueError) as e:
+            if isinstance(e, (pickle.UnpicklingError, ValueError)):
+                self.logger.warning(
+                    "Failed to load item cache entry %r in %r: %s",
+                    href, self.path, e, exc_info=True)
+            cinput_hash = cetag = ctext = ctag = cstart = cend = None
+        vobject_item = None
+        if input_hash != cinput_hash:
+            vobject_item = Item(self, href=href,
+                                text=btext.decode(self.encoding)).item
+            # Serialize the object again, to normalize the text representation.
+            # The storage may have been edited externally.
+            ctext = vobject_item.serialize()
+            cetag = get_etag(ctext)
+            try:
+                ctag, cstart, cend = xmlutils.find_tag_and_time_range(
+                    vobject_item)
+            except Exception as e:
+                raise RuntimeError("Failed to find tag and time range of item "
+                                   "%r from %r: %s" % (href, self.path,
+                                                       e)) from e
+            self._makedirs_synced(cache_folder)
+            try:
+                # Race: Other processes might have created and locked the
+                # file.
+                with self._atomic_write(os.path.join(cache_folder, href),
+                                        "wb") as f:
+                    pickle.dump((input_hash, cetag, ctext,
+                                 ctag, cstart, cend), f)
+            except PermissionError:
+                pass
+            # Clean cache entries (max once per request)
+            # This happens once after new uploads, or if the data in the
+            # file system was edited externally.
+            if not self._item_cache_cleaned:
+                self._item_cache_cleaned = True
+                self._clean_cache(cache_folder, (
+                    href for href in scandir(cache_folder) if not
+                    os.path.isfile(os.path.join(self._filesystem_path, href))))
        last_modified = time.strftime(
            "%a, %d %b %Y %H:%M:%S GMT",
            time.gmtime(os.path.getmtime(path)))
-        try:
-            item = vobject.readOne(text)
-        except Exception as e:
-            raise RuntimeError("Failed to parse item %r in %r" %
-                               (href, self.path)) from e
-        return Item(self, item, href, last_modified)
+        return Item(self, href=href, last_modified=last_modified, etag=cetag,
+                    text=ctext, item=vobject_item), (ctag, cstart, cend)
+
+    def get_multi2(self, hrefs):
+        # It's faster to check for file name collissions here, because
+        # we only need to call os.listdir once.
+        files = None
+        for href in hrefs:
+            if files is None:
+                # List dir after hrefs returned one item, the iterator may be
+                # empty and the for-loop is never executed.
+                files = os.listdir(self._filesystem_path)
+            path = os.path.join(self._filesystem_path, href)
+            if (not is_safe_filesystem_path_component(href) or
+                    href not in files and os.path.lexists(path)):
+                self.logger.debug(
+                    "Can't translate name safely to filesystem: %r", href)
+                yield (href, None)
+            else:
+                yield (href, self.get(href, verify_href=False))
+
+    def get_all(self):
+        # We don't need to check for collissions, because the the file names
+        # are from os.listdir.
+        return (self.get(href, verify_href=False) for href in self.list())
+
+    def get_all_filtered(self, filters):
+        tag, start, end, simple = xmlutils.simplify_prefilters(filters)
+        if not tag:
+            # no filter
+            yield from ((item, simple) for item in self.get_all())
+            return
+        for item, (itag, istart, iend) in (
+                self._get_with_metadata(href, verify_href=False)
+                for href in self.list()):
+            if tag == itag and istart < end and iend > start:
+                yield item, simple and (start <= istart or iend <= end)

    def upload(self, href, vobject_item):
        if not is_safe_filesystem_path_component(href):
            raise UnsafePathError(href)
        path = path_to_filesystem(self._filesystem_path, href)
-        item = Item(self, vobject_item, href)
+        item = Item(self, href=href, item=vobject_item)
        with self._atomic_write(path, newline="") as fd:
            fd.write(item.serialize())
        # Track the change
@@ -907,57 +1099,101 @@ class Collection(BaseCollection):
            self._clean_history_cache()

    def get_meta(self, key=None):
-        if os.path.exists(self._props_path):
-            with open(self._props_path, encoding=self.encoding) as f:
+        # reuse cached value if the storage is read-only
+        if self._writer or self._meta is None:
            try:
-                    meta = json.load(f)
+                with open(self._props_path, encoding=self.encoding) as f:
+                    self._meta = json.load(f)
+            except FileNotFoundError:
+                self._meta = {}
            except ValueError as e:
                raise RuntimeError("Failed to load properties of collect"
                                   "ion %r: %s" % (self.path, e)) from e
-                return meta.get(key) if key else meta
+        return self._meta.get(key) if key else self._meta

    def set_meta(self, props):
-        if os.path.exists(self._props_path):
-            with open(self._props_path, encoding=self.encoding) as f:
-                old_props = json.load(f)
-                old_props.update(props)
-                props = old_props
-        props = {key: value for key, value in props.items() if value}
-        with self._atomic_write(self._props_path, "w+") as f:
-            json.dump(props, f)
+        new_props = self.get_meta()
+        new_props.update(props)
+        for key in tuple(new_props.keys()):
+            if not new_props[key]:
+                del new_props[key]
+        with self._atomic_write(self._props_path, "w") as f:
+            json.dump(new_props, f)

    @property
    def last_modified(self):
-        relevant_files = [self._filesystem_path] + [
-            path_to_filesystem(self._filesystem_path, href)
-            for href in self.list()]
-        if os.path.exists(self._props_path):
-            relevant_files.append(self._props_path)
+        relevant_files = chain(
+            (self._filesystem_path,),
+            (self._props_path,) if os.path.exists(self._props_path) else (),
+            (os.path.join(self._filesystem_path, h) for h in self.list()))
        last = max(map(os.path.getmtime, relevant_files))
        return time.strftime("%a, %d %b %Y %H:%M:%S GMT", time.gmtime(last))

    def serialize(self):
-        items = []
-        time_begin = datetime.datetime.now()
-        for href in self.list():
-            items.append(self.get(href).item)
-        time_end = datetime.datetime.now()
-        self.logger.info(
-            "Read %d items in %.3f seconds from %r", len(items),
-            (time_end - time_begin).total_seconds(), self.path)
+        # serialize collection
        if self.get_meta("tag") == "VCALENDAR":
-            collection = vobject.iCalendar()
-            for item in items:
-                for content in ("vevent", "vtodo", "vjournal"):
-                    if content in item.contents:
-                        for item_part in getattr(item, "%s_list" % content):
-                            collection.add(item_part)
-                        break
-            return collection.serialize()
+            in_vcalendar = False
+            vtimezones = ""
+            included_tzids = set()
+            vtimezone = []
+            tzid = None
+            components = ""
+            # Concatenate all child elements of VCALENDAR from all items
+            # together, while preventing duplicated VTIMEZONE entries.
+            # VTIMEZONEs are only distinguished by their TZID, if different
+            # timezones share the same TZID this produces errornous ouput.
+            # VObject fails at this too.
+            for item in self.get_all():
+                depth = 0
+                for line in item.serialize().split("\r\n"):
+                    if line.startswith("BEGIN:"):
+                        depth += 1
+                    if depth == 1 and line == "BEGIN:VCALENDAR":
+                        in_vcalendar = True
+                    elif in_vcalendar:
+                        if depth == 1 and line.startswith("END:"):
+                            in_vcalendar = False
+                        if depth == 2 and line == "BEGIN:VTIMEZONE":
+                            vtimezone.append(line)
+                        elif vtimezone:
+                            vtimezone.append(line)
+                            if depth == 2 and line.startswith("TZID:"):
+                                tzid = line[len("TZID:"):]
+                            elif depth == 2 and line.startswith("END:"):
+                                if tzid is None or tzid not in included_tzids:
+                                    if vtimezones:
+                                        vtimezones += "\r\n"
+                                    vtimezones += "\r\n".join(vtimezone)
+                                    included_tzids.add(tzid)
+                                vtimezone.clear()
+                                tzid = None
+                        elif depth >= 2:
+                            if components:
+                                components += "\r\n"
+                            components += line
+                    if line.startswith("END:"):
+                        depth -= 1
+            return "\r\n".join(filter(bool, (
+                "BEGIN:VCALENDAR",
+                "VERSION:2.0",
+                "PRODID:-//PYVOBJECT//NONSGML Version 1//EN",
+                vtimezones,
+                components,
+                "END:VCALENDAR")))
        elif self.get_meta("tag") == "VADDRESSBOOK":
-            return "".join([item.serialize() for item in items])
+            return "".join((item.serialize() for item in self.get_all()))
        return ""

+    @property
+    def etag(self):
+        # reuse cached value if the storage is read-only
+        if self._writer or self._etag is None:
+            etag = md5()
+            for item in self.get_all():
+                etag.update((item.href + "/" + item.etag).encode("utf-8"))
+            self._etag = '"%s"' % etag.hexdigest()
+        return self._etag
+
    _lock = threading.Lock()
    _waiters = []
    _lock_file = None
--- a/radicale/xmlutils.py
+++ b/radicale/xmlutils.py
@@ -26,12 +26,14 @@ in them for XML requests (all but PUT).
 """

 import copy
+import math
 import posixpath
 import re
 import xml.etree.ElementTree as ET
 from collections import OrderedDict
-from datetime import datetime, timedelta, timezone
+from datetime import date, datetime, timedelta, timezone
 from http import client
+from itertools import chain
 from urllib.parse import quote, unquote, urlparse

 from . import storage
@@ -56,6 +58,13 @@ for short, url in NAMESPACES.items():
 CLARK_TAG_REGEX = re.compile(r"{(?P<namespace>[^}]*)}(?P<tag>.*)", re.VERBOSE)
 HUMAN_REGEX = re.compile(r"(?P<namespace>[^:{}]*)(?P<tag>.*)", re.VERBOSE)

+DAY = timedelta(days=1)
+SECOND = timedelta(seconds=1)
+DATETIME_MIN = datetime.min.replace(tzinfo=timezone.utc)
+DATETIME_MAX = datetime.max.replace(tzinfo=timezone.utc)
+TIMESTAMP_MIN = math.floor(DATETIME_MIN.timestamp())
+TIMESTAMP_MAX = math.ceil(DATETIME_MAX.timestamp())
+

 def pretty_xml(element, level=0):
    """Indent an ElementTree ``element`` and its children."""
@@ -210,11 +219,9 @@ def _prop_match(item, filter_):


 def _time_range_match(vobject_item, filter_, child_name):
-    """Check whether the ``item`` matches the time-range ``filter_``.
+    """Check whether the component/property ``child_name`` of
+       ``vobject_item`` matches the time-range ``filter_``."""

-    See rfc4791-9.9.
-
-    """
    start = filter_.get("start")
    end = filter_.get("end")
    if not start and not end:
@@ -229,14 +236,53 @@ def _time_range_match(vobject_item, filter_, child_name):
        end = datetime.max
    start = start.replace(tzinfo=timezone.utc)
    end = end.replace(tzinfo=timezone.utc)
-    child = getattr(vobject_item, child_name.lower())

+    matched = False
+
+    def range_fn(range_start, range_end):
+        nonlocal matched
+        if start < range_end and range_start < end:
+            matched = True
+            return True
+        if end < range_start:
+            return True
+        return False
+
+    def infinity_fn(start):
+        return False
+
+    _visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn)
+    return matched
+
+
+def _visit_time_ranges(vobject_item, child_name, range_fn, infinity_fn):
+    """Visit all time ranges in the component/property ``child_name`` of
+    `vobject_item`` with visitors ``range_fn`` and ``infinity_fn``.
+
+    ``range_fn`` gets called for every time_range with ``start`` and ``end``
+    datetimes as arguments. If the function returns True, the operation is
+    cancelled.
+
+    ``infinity_fn`` gets called when an infiite recurrence rule is detected
+    with ``start`` datetime as argument. If the function returns True, the
+    operation is cancelled.
+
+    See rfc4791-9.9.
+
+    """
+    child = getattr(vobject_item, child_name.lower())
    # Comments give the lines in the tables of the specification
    if child_name == "VEVENT":
        # TODO: check if there's a timezone
        dtstart = child.dtstart.value

        if child.rruleset:
+            if (";UNTIL=" not in child.rrule.value and
+                    ";COUNT=" not in child.rrule.value):
+                for dtstart in child.getrruleset(addRDate=True):
+                    if infinity_fn(_date_to_datetime(dtstart)):
+                        return
+                    break
            dtstarts = child.getrruleset(addRDate=True)
        else:
            dtstarts = (dtstart,)
@@ -255,31 +301,30 @@ def _time_range_match(vobject_item, filter_, child_name):
            dtstart_is_datetime = isinstance(dtstart, datetime)
            dtstart = _date_to_datetime(dtstart)

-            if dtstart > end:
-                break
-
            if dtend is not None:
                # Line 1
                dtend = dtstart + timedelta(seconds=original_duration)
-                if start < dtend and end > dtstart:
-                    return True
+                if range_fn(dtstart, dtend):
+                    return
            elif duration is not None:
                if original_duration is None:
                    original_duration = duration.seconds
                if duration.seconds > 0:
                    # Line 2
-                    if start < dtstart + duration and end > dtstart:
-                        return True
-                elif start <= dtstart and end > dtstart:
+                    if range_fn(dtstart, dtstart + duration):
+                        return
+                else:
                    # Line 3
-                    return True
+                    if range_fn(dtstart, dtstart + SECOND):
+                        return
            elif dtstart_is_datetime:
                # Line 4
-                if start <= dtstart and end > dtstart:
-                    return True
-            elif start < dtstart + timedelta(days=1) and end > dtstart:
+                if range_fn(dtstart, dtstart + SECOND):
+                    return
+            else:
                # Line 5
-                return True
+                if range_fn(dtstart, dtstart + DAY):
+                    return

    elif child_name == "VTODO":
        dtstart = getattr(child, "dtstart", None)
@@ -305,6 +350,12 @@ def _time_range_match(vobject_item, filter_, child_name):
            created = _date_to_datetime(created.value)

        if child.rruleset:
+            if (";UNTIL=" not in child.rrule.value and
+                    ";COUNT=" not in child.rrule.value):
+                for reference_date in child.getrruleset(addRDate=True):
+                    if infinity_fn(_date_to_datetime(reference_date)):
+                        return
+                    break
            reference_dates = child.getrruleset(addRDate=True)
        else:
            if dtstart is not None:
@@ -317,47 +368,56 @@ def _time_range_match(vobject_item, filter_, child_name):
                reference_dates = (created,)
            else:
                # Line 8
-                return True
+                if range_fn(DATETIME_MIN, DATETIME_MAX):
+                    return
+                reference_dates = ()

        for reference_date in reference_dates:
            reference_date = _date_to_datetime(reference_date)
-            if reference_date > end:
-                break

            if dtstart is not None and duration is not None:
                # Line 1
-                if start <= reference_date + duration and (
-                        end > reference_date or
-                        end >= reference_date + duration):
-                    return True
+                if range_fn(reference_date,
+                            reference_date + duration + SECOND):
+                    return
+                if range_fn(reference_date + duration - SECOND,
+                            reference_date + duration + SECOND):
+                    return
            elif dtstart is not None and due is not None:
                # Line 2
                due = reference_date + timedelta(seconds=original_duration)
-                if (start < due or start <= reference_date) and (
-                        end > reference_date or end >= due):
-                    return True
+                if (range_fn(reference_date, due) or
+                        range_fn(reference_date, reference_date + SECOND) or
+                        range_fn(due - SECOND, due) or
+                        range_fn(due - SECOND, reference_date + SECOND)):
+                    return
            elif dtstart is not None:
-                if start <= reference_date and end > reference_date:
-                    return True
+                if range_fn(reference_date, reference_date + SECOND):
+                    return
            elif due is not None:
                # Line 4
-                if start < reference_date and end >= reference_date:
-                    return True
+                if range_fn(reference_date - SECOND, reference_date):
+                    return
            elif completed is not None and created is not None:
                # Line 5
                completed = reference_date + timedelta(
                    seconds=original_duration)
-                if (start <= reference_date or start <= completed) and (
-                        end >= reference_date or end >= completed):
-                    return True
+                if (range_fn(reference_date - SECOND,
+                             reference_date + SECOND) or
+                        range_fn(completed - SECOND, completed + SECOND) or
+                        range_fn(reference_date - SECOND,
+                                 reference_date + SECOND) or
+                        range_fn(completed - SECOND, completed + SECOND)):
+                    return
            elif completed is not None:
                # Line 6
-                if start <= reference_date and end >= reference_date:
-                    return True
+                if range_fn(reference_date - SECOND,
+                            reference_date + SECOND):
+                            return
            elif created is not None:
                # Line 7
-                if end > reference_date:
-                    return True
+                if range_fn(reference_date, DATETIME_MAX):
+                    return

    elif child_name == "VJOURNAL":
        dtstart = getattr(child, "dtstart", None)
@@ -365,6 +425,12 @@ def _time_range_match(vobject_item, filter_, child_name):
        if dtstart is not None:
            dtstart = dtstart.value
            if child.rruleset:
+                if (";UNTIL=" not in child.rrule.value and
+                        ";COUNT=" not in child.rrule.value):
+                    for dtstart in child.getrruleset(addRDate=True):
+                        if infinity_fn(_date_to_datetime(dtstart)):
+                            return
+                        break
                dtstarts = child.getrruleset(addRDate=True)
            else:
                dtstarts = (dtstart,)
@@ -373,18 +439,21 @@ def _time_range_match(vobject_item, filter_, child_name):
                dtstart_is_datetime = isinstance(dtstart, datetime)
                dtstart = _date_to_datetime(dtstart)

-                if dtstart > end:
-                    break
-
                if dtstart_is_datetime:
                    # Line 1
-                    if start <= dtstart and end > dtstart:
-                        return True
-                elif start < dtstart + timedelta(days=1) and end > dtstart:
+                    if range_fn(dtstart, dtstart + SECOND):
+                        return
+                else:
                    # Line 2
-                    return True
+                    if range_fn(dtstart, dtstart + DAY):
+                        return

-    return False
+    elif isinstance(child, date):
+        if range_fn(child, child + DAY):
+            return
+    elif isinstance(child, datetime):
+        if range_fn(child, child + SECOND):
+            return


 def _text_match(vobject_item, filter_, child_name, attrib_name=None):
@@ -429,6 +498,99 @@ def _param_filter_match(vobject_item, filter_, parent_name):
        return condition


+def simplify_prefilters(filters):
+    """Creates a simplified condition from ``filters``.
+
+    Returns a tuple (``tag``, ``start``, ``end``, ``simple``) where ``tag`` is
+    a string or None (match all) and ``start`` and ``end`` are POSIX
+    timestamps (as int). ``simple`` is a bool that indicates that ``filters``
+    and the simplified condition are identical.
+
+    """
+    flat_filters = tuple(chain.from_iterable(filters))
+    simple = len(flat_filters) <= 1
+    for col_filter in flat_filters:
+        if (col_filter.tag != _tag("C", "comp-filter") or
+                col_filter.get("name") != "VCALENDAR"):
+            simple = False
+            continue
+        simple &= len(col_filter) <= 1
+        for comp_filter in col_filter:
+            if comp_filter.tag != _tag("C", "comp-filter"):
+                simple = False
+                continue
+            tag = comp_filter.get("name")
+            if (tag not in ("VTODO", "VEVENT", "VJOURNAL") or comp_filter.find(
+                    _tag("C", "is-not-defined")) is not None):
+                simple = False
+                continue
+            simple &= len(comp_filter) <= 1
+            for time_filter in comp_filter:
+                if time_filter.tag != _tag("C", "time-range"):
+                    simple = False
+                    continue
+                start = time_filter.get("start")
+                end = time_filter.get("end")
+                if start:
+                    start = math.floor(datetime.strptime(
+                        start, "%Y%m%dT%H%M%SZ").replace(
+                            tzinfo=timezone.utc).timestamp())
+                else:
+                    start = TIMESTAMP_MIN
+                if end:
+                    end = math.ceil(datetime.strptime(
+                        end, "%Y%m%dT%H%M%SZ").replace(
+                            tzinfo=timezone.utc).timestamp())
+                else:
+                    end = TIMESTAMP_MAX
+                return tag, start, end, simple
+            return tag, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
+    return None, TIMESTAMP_MIN, TIMESTAMP_MAX, simple
+
+
+def find_tag_and_time_range(vobject_item):
+    """Find tag and enclosing time range from ``vobject item``.
+
+    Returns a tuple (``tag``, ``start``, ``end``) where ``tag`` is a string
+    and ``start`` and ``end`` are POSIX timestamps (as int).
+
+    This is intened to be used for matching against simplified prefilters.
+
+    """
+    tag = ""
+    if vobject_item.name == "VCALENDAR":
+        for component in vobject_item.components():
+            if component.name in ("VTODO", "VEVENT", "VJOURNAL"):
+                tag = component.name
+                break
+    if not tag:
+        return (None, math.floor(DATETIME_MIN.timestamp()),
+                math.ceil(DATETIME_MAX.timestamp()))
+    start = end = None
+
+    def range_fn(range_start, range_end):
+        nonlocal start, end
+        if start is None or range_start < start:
+            start = range_start
+        if end is None or end < range_end:
+            end = range_end
+        return False
+
+    def infinity_fn(range_start):
+        nonlocal start, end
+        if start is None or range_start < start:
+            start = range_start
+        end = DATETIME_MAX
+        return True
+
+    _visit_time_ranges(vobject_item, tag, range_fn, infinity_fn)
+    if start is None:
+        start = DATETIME_MIN
+    if end is None:
+        end = DATETIME_MAX
+    return tag, math.floor(start.timestamp()), math.ceil(end.timestamp())
+
+
 def name_from_path(path, collection):
    """Return Radicale item name from ``path``."""
    path = path.strip("/") + "/"
@@ -891,43 +1053,60 @@ def report(base_prefix, path, xml_request, collection):
        root.findall("./%s" % _tag("C", "filter")) +
        root.findall("./%s" % _tag("CR", "filter")))

+    def retrieve_items(collection, hreferences, multistatus):
+        """Retrieves all items that are referenced in ``hreferences`` from
+           ``collection`` and adds 404 responses for missing and invalid items
+           to ``multistatus``."""
+        collection_requested = False
+
+        def get_names():
+            """Extracts all names from references in ``hreferences`` and adds
+               404 responses for invalid references to ``multistatus``.
+               If the whole collections is referenced ``collection_requested``
+               gets set to ``True``."""
+            nonlocal collection_requested
            for hreference in hreferences:
                try:
                    name = name_from_path(hreference, collection)
                except ValueError as e:
-            collection.logger.warning("Skipping invalid path %r in REPORT "
-                                      "request on %r: %s", hreference, path, e)
+                    collection.logger.warning(
+                        "Skipping invalid path %r in REPORT request on %r: %s",
+                        hreference, path, e)
                    response = _item_response(base_prefix, hreference,
                                              found_item=False)
                    multistatus.append(response)
                    continue
                if name:
                    # Reference is an item
-            item = collection.get(name)
-            if not item:
-                response = _item_response(base_prefix, hreference,
-                                          found_item=False)
-                multistatus.append(response)
-                continue
-            items = [item]
+                    yield name
                else:
                    # Reference is a collection
-            items = collection.pre_filtered_list(filters)
+                    collection_requested = True

-        for item in items:
+        for name, item in collection.get_multi2(get_names()):
            if not item:
-                continue
-            if filters:
-                try:
-                    match = (_comp_match
-                             if collection.get_meta("tag") == "VCALENDAR"
+                uri = "/" + posixpath.join(collection.path, name)
+                response = _item_response(base_prefix, uri,
+                                          found_item=False)
+                multistatus.append(response)
+            else:
+                yield item, False
+        if collection_requested:
+            yield from collection.get_all_filtered(filters)
+
+    for item, filters_matched in retrieve_items(collection, hreferences,
+                                                multistatus):
+        if filters and not filters_matched:
+            match = (
+                _comp_match if collection.get_meta("tag") == "VCALENDAR"
                else _prop_match)
+            try:
                if not all(match(item, filter_[0]) for filter_ in filters
                           if filter_):
                    continue
            except Exception as e:
                raise RuntimeError("Failed to filter item %r from %r: %s" %
-                                       (collection.path, item.href, e)) from e
+                                   (item.href, collection.path, e)) from e

        found_props = []
        not_found_props = []