mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Print report even if exiting due to Ctrl+C
This commit is contained in:
parent
980578d05a
commit
edb52a989e
@ -107,15 +107,22 @@ def main() -> None:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
pferd = Pferd(config, args.crawler)
|
pferd = Pferd(config, args.crawler)
|
||||||
asyncio.run(pferd.run())
|
except PferdLoadError as e:
|
||||||
except (PferdLoadError, ConfigOptionError) as e:
|
|
||||||
log.unlock()
|
log.unlock()
|
||||||
log.error(str(e))
|
log.error(str(e))
|
||||||
exit(1)
|
exit(1)
|
||||||
|
|
||||||
|
error = False
|
||||||
|
try:
|
||||||
|
asyncio.run(pferd.run())
|
||||||
|
except ConfigOptionError as e:
|
||||||
|
log.unlock()
|
||||||
|
log.error(str(e))
|
||||||
|
error = True
|
||||||
except RuleParseError as e:
|
except RuleParseError as e:
|
||||||
log.unlock()
|
log.unlock()
|
||||||
e.pretty_print()
|
e.pretty_print()
|
||||||
exit(1)
|
error = True
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
log.unlock()
|
log.unlock()
|
||||||
log.explain_topic("Interrupted, exiting immediately")
|
log.explain_topic("Interrupted, exiting immediately")
|
||||||
@ -123,9 +130,14 @@ def main() -> None:
|
|||||||
log.explain("Temporary files are not cleaned up")
|
log.explain("Temporary files are not cleaned up")
|
||||||
# TODO Clean up tmp files
|
# TODO Clean up tmp files
|
||||||
# And when those files *do* actually get cleaned up properly,
|
# And when those files *do* actually get cleaned up properly,
|
||||||
# reconsider what exit code to use here.
|
# reconsider if this should be an error
|
||||||
exit(1)
|
error = True
|
||||||
except Exception:
|
except Exception:
|
||||||
log.unlock()
|
log.unlock()
|
||||||
log.unexpected_exception()
|
log.unexpected_exception()
|
||||||
|
error = True
|
||||||
|
|
||||||
|
pferd.print_report()
|
||||||
|
|
||||||
|
if error:
|
||||||
exit(1)
|
exit(1)
|
||||||
|
@ -170,6 +170,7 @@ class Config:
|
|||||||
def dump_to_stdout(self) -> None:
|
def dump_to_stdout(self) -> None:
|
||||||
self._parser.write(sys.stdout)
|
self._parser.write(sys.stdout)
|
||||||
|
|
||||||
|
# TODO Rename to "crawl_sections"
|
||||||
def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
|
def crawler_sections(self) -> List[Tuple[str, SectionProxy]]:
|
||||||
result = []
|
result = []
|
||||||
for name, proxy in self._parser.items():
|
for name, proxy in self._parser.items():
|
||||||
@ -178,6 +179,7 @@ class Config:
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
# TODO Rename to "auth_sections"
|
||||||
def authenticator_sections(self) -> List[Tuple[str, SectionProxy]]:
|
def authenticator_sections(self) -> List[Tuple[str, SectionProxy]]:
|
||||||
result = []
|
result = []
|
||||||
for name, proxy in self._parser.items():
|
for name, proxy in self._parser.items():
|
||||||
|
@ -15,20 +15,53 @@ class PferdLoadError(Exception):
|
|||||||
|
|
||||||
|
|
||||||
class Pferd:
|
class Pferd:
|
||||||
def __init__(self, config: Config, crawlers_to_run: Optional[List[str]]):
|
def __init__(self, config: Config, cli_crawlers: Optional[List[str]]):
|
||||||
"""
|
"""
|
||||||
May throw PferdLoadError.
|
May throw PferdLoadError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if crawlers_to_run is not None and len(crawlers_to_run) != len(set(crawlers_to_run)):
|
|
||||||
raise PferdLoadError("Some crawlers were selected multiple times")
|
|
||||||
|
|
||||||
self._config = config
|
self._config = config
|
||||||
self._crawlers_to_run = crawlers_to_run
|
self._crawlers_to_run = self._find_crawlers_to_run(config, cli_crawlers)
|
||||||
|
|
||||||
self._authenticators: Dict[str, Authenticator] = {}
|
self._authenticators: Dict[str, Authenticator] = {}
|
||||||
self._crawlers: Dict[str, Crawler] = {}
|
self._crawlers: Dict[str, Crawler] = {}
|
||||||
|
|
||||||
|
def _find_crawlers_to_run(self, config: Config, cli_crawlers: Optional[List[str]]) -> List[str]:
|
||||||
|
log.explain_topic("Deciding which crawlers to run")
|
||||||
|
crawl_sections = [name for name, _ in config.crawler_sections()]
|
||||||
|
|
||||||
|
if cli_crawlers is None:
|
||||||
|
log.explain("No crawlers specified on CLI")
|
||||||
|
log.explain("Running all crawlers specified in config")
|
||||||
|
return crawl_sections
|
||||||
|
|
||||||
|
if len(cli_crawlers) != len(set(cli_crawlers)):
|
||||||
|
raise PferdLoadError("Some crawlers were selected multiple times")
|
||||||
|
|
||||||
|
log.explain("Crawlers specified on CLI")
|
||||||
|
|
||||||
|
crawlers_to_run = [] # With crawl: prefix
|
||||||
|
unknown_names = [] # Without crawl: prefix
|
||||||
|
|
||||||
|
for name in cli_crawlers:
|
||||||
|
section_name = f"crawl:{name}"
|
||||||
|
if section_name in crawl_sections:
|
||||||
|
log.explain(f"Crawler section named {section_name!r} exists")
|
||||||
|
crawlers_to_run.append(section_name)
|
||||||
|
else:
|
||||||
|
log.explain(f"There's no crawler section named {section_name!r}")
|
||||||
|
unknown_names.append(name)
|
||||||
|
|
||||||
|
if unknown_names:
|
||||||
|
if len(unknown_names) == 1:
|
||||||
|
[name] = unknown_names
|
||||||
|
raise PferdLoadError(f"There is no crawler named {name!r}")
|
||||||
|
else:
|
||||||
|
names_str = ", ".join(repr(name) for name in unknown_names)
|
||||||
|
raise PferdLoadError(f"There are no crawlers named {names_str}")
|
||||||
|
|
||||||
|
return crawlers_to_run
|
||||||
|
|
||||||
def _load_authenticators(self) -> None:
|
def _load_authenticators(self) -> None:
|
||||||
for name, section in self._config.authenticator_sections():
|
for name, section in self._config.authenticator_sections():
|
||||||
log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
|
log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
|
||||||
@ -40,15 +73,12 @@ class Pferd:
|
|||||||
authenticator = authenticator_constructor(name, section, self._config)
|
authenticator = authenticator_constructor(name, section, self._config)
|
||||||
self._authenticators[name] = authenticator
|
self._authenticators[name] = authenticator
|
||||||
|
|
||||||
def _load_crawlers(self) -> List[str]:
|
def _load_crawlers(self) -> None:
|
||||||
names = []
|
|
||||||
|
|
||||||
# Cookie sharing
|
# Cookie sharing
|
||||||
kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
|
kit_ilias_web_paths: Dict[Authenticator, List[Path]] = {}
|
||||||
|
|
||||||
for name, section in self._config.crawler_sections():
|
for name, section in self._config.crawler_sections():
|
||||||
log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
|
log.print(f"[bold bright_cyan]Loading[/] {escape(name)}")
|
||||||
names.append(name)
|
|
||||||
|
|
||||||
crawl_type = section.get("type")
|
crawl_type = section.get("type")
|
||||||
crawler_constructor = CRAWLERS.get(crawl_type)
|
crawler_constructor = CRAWLERS.get(crawl_type)
|
||||||
@ -62,55 +92,20 @@ class Pferd:
|
|||||||
if isinstance(crawler, KitIliasWebCrawler):
|
if isinstance(crawler, KitIliasWebCrawler):
|
||||||
crawler.share_cookies(kit_ilias_web_paths)
|
crawler.share_cookies(kit_ilias_web_paths)
|
||||||
|
|
||||||
return names
|
|
||||||
|
|
||||||
def _find_crawlers_to_run(self, loaded_crawlers: List[str]) -> List[str]:
|
|
||||||
log.explain_topic("Deciding which crawlers to run")
|
|
||||||
|
|
||||||
if self._crawlers_to_run is None:
|
|
||||||
log.explain("No crawlers specified on CLI")
|
|
||||||
log.explain("Running all loaded crawlers")
|
|
||||||
return loaded_crawlers
|
|
||||||
|
|
||||||
log.explain("Crawlers specified on CLI")
|
|
||||||
|
|
||||||
names: List[str] = [] # With 'crawl:' prefix
|
|
||||||
unknown_names = [] # Without 'crawl:' prefix
|
|
||||||
|
|
||||||
for name in self._crawlers_to_run:
|
|
||||||
section_name = f"crawl:{name}"
|
|
||||||
if section_name in self._crawlers:
|
|
||||||
log.explain(f"Crawler section named {section_name!r} exists")
|
|
||||||
names.append(section_name)
|
|
||||||
else:
|
|
||||||
log.explain(f"There's no crawler section named {section_name!r}")
|
|
||||||
unknown_names.append(name)
|
|
||||||
|
|
||||||
if unknown_names:
|
|
||||||
if len(unknown_names) == 1:
|
|
||||||
[name] = unknown_names
|
|
||||||
raise PferdLoadError(f"There is no crawler named {name!r}")
|
|
||||||
else:
|
|
||||||
names_str = ", ".join(repr(name) for name in unknown_names)
|
|
||||||
raise PferdLoadError(f"There are no crawlers named {names_str}")
|
|
||||||
|
|
||||||
return names
|
|
||||||
|
|
||||||
async def run(self) -> None:
|
async def run(self) -> None:
|
||||||
"""
|
"""
|
||||||
May throw PferdLoadError or ConfigOptionError.
|
May throw ConfigOptionError.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# These two functions must run inside the same event loop as the
|
# These two functions must run inside the same event loop as the
|
||||||
# crawlers, so that any new objects (like Conditions or Futures) can
|
# crawlers, so that any new objects (like Conditions or Futures) can
|
||||||
# obtain the correct event loop.
|
# obtain the correct event loop.
|
||||||
self._load_authenticators()
|
self._load_authenticators()
|
||||||
loaded_crawlers = self._load_crawlers()
|
self._load_crawlers()
|
||||||
names = self._find_crawlers_to_run(loaded_crawlers)
|
|
||||||
|
|
||||||
log.print("")
|
log.print("")
|
||||||
|
|
||||||
for name in names:
|
for name in self._crawlers_to_run:
|
||||||
crawler = self._crawlers[name]
|
crawler = self._crawlers[name]
|
||||||
|
|
||||||
log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
|
log.print(f"[bold bright_cyan]Running[/] {escape(name)}")
|
||||||
@ -122,7 +117,8 @@ class Pferd:
|
|||||||
except Exception:
|
except Exception:
|
||||||
log.unexpected_exception()
|
log.unexpected_exception()
|
||||||
|
|
||||||
for name in names:
|
def print_report(self) -> None:
|
||||||
|
for name in self._crawlers_to_run:
|
||||||
crawler = self._crawlers[name]
|
crawler = self._crawlers[name]
|
||||||
|
|
||||||
log.report("")
|
log.report("")
|
||||||
|
Loading…
Reference in New Issue
Block a user