Fix errors loading local crawler config

Apparently getint and getfloat may return a None even though this is not
mentioned in their type annotations.
This commit is contained in:
Joscha 2021-05-15 13:32:13 +02:00
parent acd674f0a0
commit 302b8c0c34
2 changed files with 19 additions and 21 deletions

View File

@ -115,9 +115,9 @@ crawler simulate a slower, network-based crawler.
- `path`: Path to the local directory to crawl. (Required) - `path`: Path to the local directory to crawl. (Required)
- `crawl_delay`: Maximum artificial delay (in seconds) to simulate for crawl - `crawl_delay`: Maximum artificial delay (in seconds) to simulate for crawl
requests. (Optional) requests. (Default: 0.0)
- `download_delay`: Maximum artificial delay (in seconds) to simulate for - `download_delay`: Maximum artificial delay (in seconds) to simulate for
download requests. (Optional) download requests. (Default: 0.0)
- `download_speed`: Download speed (in bytes per second) to simulate. (Optional) - `download_speed`: Download speed (in bytes per second) to simulate. (Optional)
## Authenticator types ## Authenticator types

View File

@ -16,23 +16,23 @@ class LocalCrawlerSection(CrawlerSection):
self.missing_value("path") self.missing_value("path")
return Path(value).expanduser() return Path(value).expanduser()
def crawl_delay(self) -> Optional[float]: def crawl_delay(self) -> float:
value = self.s.getfloat("crawl_delay") value = self.s.getfloat("crawl_delay", fallback=0.0)
if value <= 0: if value < 0:
self.invalid_value("crawl_delay", value, self.invalid_value("crawl_delay", value,
"Must be greater than 0") "Must not be negative")
return value return value
def download_delay(self) -> Optional[float]: def download_delay(self) -> float:
value = self.s.getfloat("download_delay") value = self.s.getfloat("download_delay", fallback=0.0)
if value <= 0: if value < 0:
self.invalid_value("download_delay", value, self.invalid_value("download_delay", value,
"Must be greater than 0") "Must not be negative")
return value return value
def download_speed(self) -> Optional[int]: def download_speed(self) -> Optional[int]:
value = self.s.getint("download_speed") value = self.s.getint("download_speed")
if value <= 0: if value is not None and value <= 0:
self.invalid_value("download_speed", value, self.invalid_value("download_speed", value,
"Must be greater than 0") "Must be greater than 0")
return value return value
@ -74,11 +74,10 @@ class LocalCrawler(Crawler):
tasks = [] tasks = []
async with self.crawl_bar(pure): async with self.crawl_bar(pure):
if self._crawl_delay: await asyncio.sleep(random.uniform(
await asyncio.sleep(random.uniform( 0.5 * self._crawl_delay,
0.5 * self._crawl_delay, self._crawl_delay,
self._crawl_delay, ))
))
for child in path.iterdir(): for child in path.iterdir():
pure_child = pure / child.name pure_child = pure / child.name
@ -94,11 +93,10 @@ class LocalCrawler(Crawler):
return return
async with self.download_bar(path) as bar: async with self.download_bar(path) as bar:
if self._download_delay: await asyncio.sleep(random.uniform(
await asyncio.sleep(random.uniform( 0.5 * self._download_delay,
0.5 * self._download_delay, self._download_delay,
self._download_delay, ))
))
bar.set_total(stat.st_size) bar.set_total(stat.st_size)