From b70b62cef542b282c69071b5cf963ed91ead2b65 Mon Sep 17 00:00:00 2001 From: Joscha Date: Sat, 15 May 2021 17:23:33 +0200 Subject: [PATCH] Make crawler sections start with "crawl:" Also, use only the part of the section name after the "crawl:" as the crawler's output directory. Now, the implementation matches the documentation again --- CONFIG.md | 2 +- PFERD/config.py | 2 +- PFERD/crawler.py | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index 22078ae..11c4282 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -40,7 +40,7 @@ crawlers: - `type`: The types are specified in [this section](#crawler-types). - `output_dir`: The directory the crawler synchronizes files to. A crawler will - never place any files outside of this directory. (Default: crawler's name) + never place any files outside of this directory. (Default: the crawler's name) - `redownload`: When to download again a file that is already present locally. (Default: `never-smart`) - `never`: If a file is present locally, it is not downloaded again. diff --git a/PFERD/config.py b/PFERD/config.py index 66b882e..7a7e832 100644 --- a/PFERD/config.py +++ b/PFERD/config.py @@ -140,7 +140,7 @@ class Config: def crawler_sections(self) -> List[Tuple[str, SectionProxy]]: result = [] for name, proxy in self._parser.items(): - if name.startswith("crawler:"): + if name.startswith("crawl:"): result.append((name, proxy)) return result diff --git a/PFERD/crawler.py b/PFERD/crawler.py index f49eba8..4148614 100644 --- a/PFERD/crawler.py +++ b/PFERD/crawler.py @@ -110,6 +110,9 @@ def arepeat(attempts: int) -> Callable[[AWrapped], AWrapped]: class CrawlerSection(Section): def output_dir(self, name: str) -> Path: + # TODO Use removeprefix() after switching to 3.9 + if name.startswith("crawl:"): + name = name[len("crawl:"):] return Path(self.s.get("output_dir", name)).expanduser() def redownload(self) -> Redownload: