mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Display reason for ignoring an element in ilias crawler
This commit is contained in:
parent
dc964a9d98
commit
13bc78c889
@ -162,7 +162,7 @@ class IliasCrawler:
|
|||||||
element_path = Path(parent_path, link_element.getText().strip())
|
element_path = Path(parent_path, link_element.getText().strip())
|
||||||
|
|
||||||
if not self.dir_filter(element_path):
|
if not self.dir_filter(element_path):
|
||||||
PRETTY.filtered_path(element_path)
|
PRETTY.filtered_path(element_path, "user filter")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
LOGGER.info("Searching %r", str(element_path))
|
LOGGER.info("Searching %r", str(element_path))
|
||||||
@ -190,6 +190,7 @@ class IliasCrawler:
|
|||||||
# A forum
|
# A forum
|
||||||
if str(img_tag["src"]).endswith("frm.svg"):
|
if str(img_tag["src"]).endswith("frm.svg"):
|
||||||
LOGGER.debug("Skipping forum at %r", url)
|
LOGGER.debug("Skipping forum at %r", url)
|
||||||
|
PRETTY.filtered_path(element_path, "forum")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# An exercise
|
# An exercise
|
||||||
@ -197,6 +198,11 @@ class IliasCrawler:
|
|||||||
LOGGER.debug("Crawling exercises at %r", url)
|
LOGGER.debug("Crawling exercises at %r", url)
|
||||||
return self._crawl_exercises(element_path, url)
|
return self._crawl_exercises(element_path, url)
|
||||||
|
|
||||||
|
if str(img_tag["src"]).endswith("icon_webr.svg"):
|
||||||
|
LOGGER.debug("Skipping external link at %r", url)
|
||||||
|
PRETTY.filtered_path(element_path, "external link")
|
||||||
|
return []
|
||||||
|
|
||||||
# Match the opencast video plugin
|
# Match the opencast video plugin
|
||||||
if "opencast" in str(img_tag["alt"]).lower():
|
if "opencast" in str(img_tag["alt"]).lower():
|
||||||
LOGGER.debug("Found video site: %r", url)
|
LOGGER.debug("Found video site: %r", url)
|
||||||
|
@ -145,13 +145,15 @@ class PrettyLogger:
|
|||||||
|
|
||||||
self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}")
|
self.logger.info(f"{Style.DIM}Ignored {str(file_name)!r}.{Style.RESET_ALL}")
|
||||||
|
|
||||||
def filtered_path(self, path: Path) -> None:
|
def filtered_path(self, path: Path, reason: str) -> None:
|
||||||
"""
|
"""
|
||||||
A crawler filter rejected the given path.
|
A crawler filter rejected the given path.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
f"{Style.DIM}Not considering {str(path)!r} due to filter rules.{Style.RESET_ALL}"
|
f"{Style.DIM}Not considering {str(path)!r} due to filter rules"
|
||||||
|
f" ({Style.NORMAL}{reason}{Style.DIM})."
|
||||||
|
f"{Style.RESET_ALL}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def starting_synchronizer(
|
def starting_synchronizer(
|
||||||
|
Loading…
Reference in New Issue
Block a user