Overhaul config and CLI option names

This commit is contained in:
Joscha
2021-05-25 14:12:19 +02:00
parent eb8b915813
commit 61430c8739
5 changed files with 154 additions and 129 deletions

View File

@ -169,33 +169,33 @@ class CrawlerSection(Section):
def transform(self) -> str:
return self.s.get("transform", "")
def max_concurrent_tasks(self) -> int:
value = self.s.getint("max_concurrent_tasks", fallback=1)
def tasks(self) -> int:
value = self.s.getint("tasks", fallback=1)
if value <= 0:
self.invalid_value("max_concurrent_tasks", value,
"Must be greater than 0")
self.invalid_value("tasks", value, "Must be greater than 0")
return value
def max_concurrent_downloads(self) -> int:
tasks = self.max_concurrent_tasks()
value = self.s.getint("max_concurrent_downloads", fallback=None)
def downloads(self) -> int:
tasks = self.tasks()
value = self.s.getint("downloads", fallback=None)
if value is None:
return tasks
if value <= 0:
self.invalid_value("max_concurrent_downloads", value,
"Must be greater than 0")
self.invalid_value("downloads", value, "Must be greater than 0")
if value > tasks:
self.invalid_value("max_concurrent_downloads", value,
"Must not be greater than max_concurrent_tasks")
self.invalid_value("downloads", value, "Must not be greater than tasks")
return value
def delay_between_tasks(self) -> float:
value = self.s.getfloat("delay_between_tasks", fallback=0.0)
def task_delay(self) -> float:
value = self.s.getfloat("task_delay", fallback=0.0)
if value < 0:
self.invalid_value("delay_between_tasks", value,
"Must not be negative")
self.invalid_value("task_delay", value, "Must not be negative")
return value
def windows_paths(self) -> bool:
on_windows = os.name == "nt"
return self.s.getboolean("windows_paths", fallback=on_windows)
def auth(self, authenticators: Dict[str, Authenticator]) -> Authenticator:
value = self.s.get("auth")
if value is None:
@ -205,10 +205,6 @@ class CrawlerSection(Section):
self.invalid_value("auth", value, "No such auth section exists")
return auth
def windows_paths(self) -> bool:
on_windows = os.name == "nt"
return self.s.getboolean("windows_paths", fallback=on_windows)
class Crawler(ABC):
def __init__(
@ -230,9 +226,9 @@ class Crawler(ABC):
self.error_free = True
self._limiter = Limiter(
task_limit=section.max_concurrent_tasks(),
download_limit=section.max_concurrent_downloads(),
task_delay=section.delay_between_tasks(),
task_limit=section.tasks(),
download_limit=section.downloads(),
task_delay=section.task_delay(),
)
self._deduplicator = Deduplicator(section.windows_paths())

View File

@ -40,18 +40,14 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
self.invalid_value("target", target, "Should be <course id | desktop | kit ilias URL>")
def tfa_auth(self, authenticators: Dict[str, Authenticator]) -> Optional[Authenticator]:
value = self.s.get("tfa_auth")
if not value:
value: Optional[str] = self.s.get("tfa_auth")
if value is None:
return None
auth = authenticators.get(f"auth:{value}")
auth = authenticators.get(value)
if auth is None:
self.invalid_value("auth", value, "No such auth section exists")
self.invalid_value("tfa_auth", value, "No such auth section exists")
return auth
def link_file_redirect_delay(self) -> int:
return self.s.getint("link_file_redirect_delay", fallback=-1)
def links(self) -> Links:
type_str: Optional[str] = self.s.get("links")
@ -63,6 +59,9 @@ class KitIliasWebCrawlerSection(HttpCrawlerSection):
except ValueError as e:
self.invalid_value("links", type_str, str(e).capitalize())
def link_redirect_delay(self) -> int:
return self.s.getint("link_redirect_delay", fallback=-1)
def videos(self) -> bool:
return self.s.getboolean("videos", fallback=False)
@ -173,7 +172,7 @@ class KitIliasWebCrawler(HttpCrawler):
self._base_url = "https://ilias.studium.kit.edu"
self._target = section.target()
self._link_file_redirect_delay = section.link_file_redirect_delay()
self._link_file_redirect_delay = section.link_redirect_delay()
self._links = section.links()
self._videos = section.videos()