Compare commits

..

5 Commits

8 changed files with 33 additions and 70 deletions

View File

@ -22,9 +22,18 @@ ambiguous situations.
## Unreleased
## 3.4.2 - 2022-10-26
### Added
- Recognize and crawl content pages in cards
- Recognize and ignore surveys
### Fixed
- Forum crawling crashing when parsing empty (= 0 messages) threads
- Forum crawling crashing when a thread has no messages at all
- Forum crawling crashing when a forum has no threads at all
- Ilias login failing in some cases
- Crawling of paginated future meetings
- IPD crawler handling of URLs without trailing slash
## 3.4.1 - 2022-08-17

View File

@ -5,8 +5,6 @@ import os
import sys
from pathlib import Path
from PFERD.update import check_for_updates
from .auth import AuthLoadError
from .cli import PARSER, ParserLoadError, load_default_section
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
@ -136,11 +134,6 @@ def main() -> None:
loop.run_until_complete(asyncio.sleep(1))
loop.close()
else:
log.explain_topic("Checking for updates")
if not args.skip_update_check:
asyncio.run(check_for_updates())
else:
log.explain("Update check skipped due to configuration option")
asyncio.run(pferd.run(args.debug_transforms))
except (ConfigOptionError, AuthLoadError) as e:
log.unlock()

View File

@ -151,11 +151,6 @@ PARSER.add_argument(
action="version",
version=f"{NAME} {VERSION} (https://github.com/Garmelon/PFERD)",
)
PARSER.add_argument(
"--skip-update-check",
action="store_true",
help="disable automatic update checks at startup"
)
PARSER.add_argument(
"--config", "-c",
type=Path,

View File

@ -24,6 +24,7 @@ class IliasElementType(Enum):
LINK = "link"
BOOKING = "booking"
MEETING = "meeting"
SURVEY = "survey"
VIDEO = "video"
VIDEO_PLAYER = "video_player"
VIDEO_FOLDER = "video_folder"
@ -730,6 +731,10 @@ class IliasPage:
return IliasElementType.TEST
if "fold" in icon["class"]:
return IliasElementType.FOLDER
if "copa" in icon["class"]:
return IliasElementType.FOLDER
if "svy" in icon["class"]:
return IliasElementType.SURVEY
_unexpected_html_warning()
log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")

View File

@ -377,9 +377,20 @@ instance's greatest bottleneck.
return None
return await self._handle_forum(element, element_path)
elif element.type == IliasElementType.TEST:
log.explain_topic(f"Decision: Crawl {fmt_path(element_path)}")
log.explain("Tests contain no relevant files")
log.explain("Answer: No")
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](tests contain no relevant data)"
)
return None
elif element.type == IliasElementType.SURVEY:
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](surveys contain no relevant data)"
)
return None
elif element.type == IliasElementType.LINK:
return await self._handle_link(element, element_path)

View File

@ -24,6 +24,9 @@ class KitIpdCrawlerSection(HttpCrawlerSection):
if not target.startswith("https://"):
self.invalid_value("target", target, "Should be a URL")
if not target.endswith("/"):
target = target + "/"
return target
def link_regex(self) -> Pattern[str]:

View File

@ -1,53 +0,0 @@
from dataclasses import dataclass
import ssl
from typing import Optional
import aiohttp
import certifi
from .version import NAME, VERSION
from .logging import log
@dataclass
class PferdUpdate:
release_url: str
version: str
def _build_session() -> aiohttp.ClientSession:
return aiohttp.ClientSession(
headers={"User-Agent": f"{NAME}/{VERSION}"},
connector=aiohttp.TCPConnector(ssl=ssl.create_default_context(cafile=certifi.where())),
timeout=aiohttp.ClientTimeout(
total=15 * 60,
connect=10,
sock_connect=10,
sock_read=10,
)
)
async def check_for_updates() -> None:
if new_version := await get_newer_version():
log.warn(
f"{NAME} version out of date. "
+ f"You are running version {VERSION!r} but {new_version.version!r} was found on GitHub."
)
log.warn_contd(f"You can download it on GitHub: {new_version.release_url}")
else:
log.explain("No update found")
async def get_newer_version() -> Optional[PferdUpdate]:
async with _build_session() as session:
async with session.get(
"https://api.github.com/repos/Garmelon/Pferd/releases/latest",
headers={"Accept": "application/vnd.github+json"}
) as response:
release_information = await response.json()
tag_name: str = release_information["tag_name"]
tag_name = tag_name.removeprefix("v")
if VERSION == tag_name:
return None
return PferdUpdate(release_url=release_information["html_url"], version=tag_name)

View File

@ -1,2 +1,2 @@
NAME = "PFERD"
VERSION = "3.4.1"
VERSION = "3.4.2"