From 302b8c0c3466a51c29f919d519edf2b0ce8f40e8 Mon Sep 17 00:00:00 2001 From: Joscha Date: Sat, 15 May 2021 13:32:13 +0200 Subject: [PATCH] Fix errors loading local crawler config Apparently getint and getfloat may return a None even though this is not mentioned in their type annotations. --- CONFIG.md | 4 ++-- PFERD/crawlers/local.py | 36 +++++++++++++++++------------------- 2 files changed, 19 insertions(+), 21 deletions(-) diff --git a/CONFIG.md b/CONFIG.md index 2338d8f..dd38c11 100644 --- a/CONFIG.md +++ b/CONFIG.md @@ -115,9 +115,9 @@ crawler simulate a slower, network-based crawler. - `path`: Path to the local directory to crawl. (Required) - `crawl_delay`: Maximum artificial delay (in seconds) to simulate for crawl - requests. (Optional) + requests. (Default: 0.0) - `download_delay`: Maximum artificial delay (in seconds) to simulate for - download requests. (Optional) + download requests. (Default: 0.0) - `download_speed`: Download speed (in bytes per second) to simulate. (Optional) ## Authenticator types diff --git a/PFERD/crawlers/local.py b/PFERD/crawlers/local.py index 07e6133..99bc700 100644 --- a/PFERD/crawlers/local.py +++ b/PFERD/crawlers/local.py @@ -16,23 +16,23 @@ class LocalCrawlerSection(CrawlerSection): self.missing_value("path") return Path(value).expanduser() - def crawl_delay(self) -> Optional[float]: - value = self.s.getfloat("crawl_delay") - if value <= 0: + def crawl_delay(self) -> float: + value = self.s.getfloat("crawl_delay", fallback=0.0) + if value < 0: self.invalid_value("crawl_delay", value, - "Must be greater than 0") + "Must not be negative") return value - def download_delay(self) -> Optional[float]: - value = self.s.getfloat("download_delay") - if value <= 0: + def download_delay(self) -> float: + value = self.s.getfloat("download_delay", fallback=0.0) + if value < 0: self.invalid_value("download_delay", value, - "Must be greater than 0") + "Must not be negative") return value def download_speed(self) -> Optional[int]: value = self.s.getint("download_speed") - if value <= 0: + if value is not None and value <= 0: self.invalid_value("download_speed", value, "Must be greater than 0") return value @@ -74,11 +74,10 @@ class LocalCrawler(Crawler): tasks = [] async with self.crawl_bar(pure): - if self._crawl_delay: - await asyncio.sleep(random.uniform( - 0.5 * self._crawl_delay, - self._crawl_delay, - )) + await asyncio.sleep(random.uniform( + 0.5 * self._crawl_delay, + self._crawl_delay, + )) for child in path.iterdir(): pure_child = pure / child.name @@ -94,11 +93,10 @@ class LocalCrawler(Crawler): return async with self.download_bar(path) as bar: - if self._download_delay: - await asyncio.sleep(random.uniform( - 0.5 * self._download_delay, - self._download_delay, - )) + await asyncio.sleep(random.uniform( + 0.5 * self._download_delay, + self._download_delay, + )) bar.set_total(stat.st_size)