Compare commits

...

19 Commits

Author SHA1 Message Date
I-Al-Istannen 533bc27439 Bump version to 3.5.0 2023-09-13 23:13:30 +02:00
I-Al-Istannen 0113a0ca10 Update flake.lock 2023-09-13 22:23:36 +02:00
I-Al-Istannen 40f8a05ad6 Add .idea to gitignore 2023-09-13 22:23:36 +02:00
I-Al-Istannen 50b50513c6 Ignore SCORM learning modules 2023-08-29 13:51:19 +02:00
I-Al-Istannen df3514cd03 Crawl paginated past meetings 2023-08-29 12:41:21 +02:00
I-Al-Istannen ad53185247 Sanitize ascii control characters on windows 2023-08-29 12:41:15 +02:00
I-Al-Istannen 87b67e9271 Crawl files in the info tab 2023-08-29 12:41:15 +02:00
I-Al-Istannen b54b3b979c Remove size suffix for content pages 2023-08-27 11:43:05 +02:00
I-Al-Istannen 2184ac8040 Add support for ILIAS mediacast listings 2023-08-27 11:43:05 +02:00
I-Al-Istannen b3d412360b Add Nix flake 2023-08-26 23:54:19 +02:00
Mr. Pine dbc2553b11 Add default `show-not-deleted` option
If set to `no`, PFERD won't print status or report messages for not deleted files
2023-08-26 18:43:01 +02:00
I-Al-Istannen 68c398f1fe Add support for ILIAS learning modules 2023-08-02 13:34:54 +02:00
I-Al-Istannen 123a57beec Fix mypy unreachable error in file_templates 2023-07-29 18:36:33 +02:00
I-Al-Istannen d204dac8ce Detect unexpected root page redirects and abort operation 2023-07-29 18:36:33 +02:00
Mr. Pine 443f7fe839 Add `no-delete-prompt-overwrite` crawler conflict resolution option (#75) 2023-07-29 18:36:33 +02:00
I-Al-Istannen 0294ceb7d5 Update github action versions 2023-03-22 00:10:54 +01:00
I-Al-Istannen 6f30c6583d Fix crawling of cards without descriptions 2023-03-21 23:52:33 +01:00
I-Al-Istannen 467fc526e8 Fix crawling of file/video cards 2023-03-21 23:52:24 +01:00
I-Al-Istannen 722d2eb393 Fix crawling of courses with preselected timeline tab 2023-03-21 23:36:47 +01:00
19 changed files with 664 additions and 88 deletions

View File

@ -17,9 +17,9 @@ jobs:
python: ["3.9"]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- uses: actions/setup-python@v2
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python }}
@ -45,7 +45,7 @@ jobs:
run: mv dist/pferd* dist/pferd-${{ matrix.os }}
- name: Upload binary
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v3
with:
name: Binaries
path: dist/pferd-${{ matrix.os }}
@ -57,7 +57,7 @@ jobs:
steps:
- name: Download binaries
uses: actions/download-artifact@v2
uses: actions/download-artifact@v3
with:
name: Binaries

1
.gitignore vendored
View File

@ -3,6 +3,7 @@
/PFERD.egg-info/
__pycache__/
/.vscode/
/.idea/
# pyinstaller
/pferd.spec

View File

@ -22,6 +22,29 @@ ambiguous situations.
## Unreleased
## 3.5.0 - 2023-09-13
### Added
- `no-delete-prompt-override` conflict resolution strategy
- Support for ILIAS learning modules
- `show_not_deleted` option to stop printing the "Not Deleted" status or report
message. This combines nicely with the `no-delete-prompt-override` strategy,
causing PFERD to mostly ignore local-only files.
- Support for mediacast video listings
- Crawling of files in info tab
### Changed
- Remove size suffix for files in content pages
### Fixed
- Crawling of courses with the timeline view as the default tab
- Crawling of file and custom opencast cards
- Crawling of button cards without descriptions
- Abort crawling when encountering an unexpected ilias root page redirect
- Sanitize ascii control characters on Windows
- Crawling of paginated past meetings
- Ignore SCORM learning modules
## 3.4.3 - 2022-11-29
### Added

View File

@ -26,6 +26,9 @@ default values for the other sections.
`Added ...`) while running a crawler. (Default: `yes`)
- `report`: Whether PFERD should print a report of added, changed and deleted
local files for all crawlers before exiting. (Default: `yes`)
- `show_not_deleted`: Whether PFERD should print messages in status and report
when a local-only file wasn't deleted. Combines nicely with the
`no-delete-prompt-override` conflict resolution strategy.
- `share_cookies`: Whether crawlers should share cookies where applicable. For
example, some crawlers share cookies if they crawl the same website using the
same account. (Default: `yes`)
@ -75,6 +78,9 @@ common to all crawlers:
using `prompt` and always choosing "yes".
- `no-delete`: Never delete local files, but overwrite local files if the
remote file is different.
- `no-delete-prompt-overwrite`: Never delete local files, but prompt to
overwrite local files if the remote file is different. Combines nicely
with the `show_not_deleted` option.
- `transform`: Rules for renaming and excluding certain files and directories.
For more details, see [this section](#transformation-rules). (Default: empty)
- `tasks`: The maximum number of concurrent tasks (such as crawling or

View File

@ -1,5 +1,6 @@
Copyright 2019-2021 Garmelon, I-Al-Istannen, danstooamerican, pavelzw,
TheChristophe, Scriptim, thelukasprobst, Toorero
TheChristophe, Scriptim, thelukasprobst, Toorero,
Mr-Pine
Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in

View File

@ -47,6 +47,8 @@ def configure_logging_from_args(args: argparse.Namespace) -> None:
log.output_explain = args.explain
if args.status is not None:
log.output_status = args.status
if args.show_not_deleted is not None:
log.output_not_deleted = args.show_not_deleted
if args.report is not None:
log.output_report = args.report
@ -72,6 +74,8 @@ def configure_logging_from_config(args: argparse.Namespace, config: Config) -> N
log.output_status = config.default_section.status()
if args.report is None:
log.output_report = config.default_section.report()
if args.show_not_deleted is None:
log.output_not_deleted = config.default_section.show_not_deleted()
except ConfigOptionError as e:
log.error(str(e))
sys.exit(1)

View File

@ -215,6 +215,11 @@ PARSER.add_argument(
action=BooleanOptionalAction,
help="whether crawlers should share cookies where applicable"
)
PARSER.add_argument(
"--show-not-deleted",
action=BooleanOptionalAction,
help="print messages in status and report when PFERD did not delete a local only file"
)
def load_default_section(
@ -233,6 +238,8 @@ def load_default_section(
section["report"] = "yes" if args.report else "no"
if args.share_cookies is not None:
section["share_cookies"] = "yes" if args.share_cookies else "no"
if args.show_not_deleted is not None:
section["show_not_deleted"] = "yes" if args.show_not_deleted else "no"
SUBPARSERS = PARSER.add_subparsers(title="crawlers")

View File

@ -82,6 +82,9 @@ class DefaultSection(Section):
def report(self) -> bool:
return self.s.getboolean("report", fallback=True)
def show_not_deleted(self) -> bool:
return self.s.getboolean("show_not_deleted", fallback=True)
def share_cookies(self) -> bool:
return self.s.getboolean("share_cookies", fallback=True)

View File

@ -1,6 +1,10 @@
from enum import Enum
from typing import Optional
import bs4
from PFERD.utils import soupify
_link_template_plain = "{{link}}"
_link_template_fancy = """
<!DOCTYPE html>
@ -94,6 +98,71 @@ _link_template_internet_shortcut = """
URL={{link}}
""".strip()
_learning_module_template = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>{{name}}</title>
</head>
<style>
* {
box-sizing: border-box;
}
.center-flex {
display: flex;
align-items: center;
justify-content: center;
}
.nav {
display: flex;
justify-content: space-between;
}
</style>
<body class="center-flex">
{{body}}
</body>
</html>
"""
def learning_module_template(body: bs4.Tag, name: str, prev: Optional[str], next: Optional[str]) -> str:
# Seems to be comments, ignore those.
for elem in body.select(".il-copg-mob-fullscreen-modal"):
elem.decompose()
nav_template = """
<div class="nav">
{{left}}
{{right}}
</div>
"""
if prev and body.select_one(".ilc_page_lnav_LeftNavigation"):
text = body.select_one(".ilc_page_lnav_LeftNavigation").getText().strip()
left = f'<a href="{prev}">{text}</a>'
else:
left = "<span></span>"
if next and body.select_one(".ilc_page_rnav_RightNavigation"):
text = body.select_one(".ilc_page_rnav_RightNavigation").getText().strip()
right = f'<a href="{next}">{text}</a>'
else:
right = "<span></span>"
if top_nav := body.select_one(".ilc_page_tnav_TopNavigation"):
top_nav.replace_with(
soupify(nav_template.replace("{{left}}", left).replace("{{right}}", right).encode())
)
if bot_nav := body.select_one(".ilc_page_bnav_BottomNavigation"):
bot_nav.replace_with(soupify(nav_template.replace(
"{{left}}", left).replace("{{right}}", right).encode())
)
body = body.prettify()
return _learning_module_template.replace("{{body}}", body).replace("{{name}}", name)
class Links(Enum):
IGNORE = "ignore"
@ -102,24 +171,24 @@ class Links(Enum):
INTERNET_SHORTCUT = "internet-shortcut"
def template(self) -> Optional[str]:
if self == self.FANCY:
if self == Links.FANCY:
return _link_template_fancy
elif self == self.PLAINTEXT:
elif self == Links.PLAINTEXT:
return _link_template_plain
elif self == self.INTERNET_SHORTCUT:
elif self == Links.INTERNET_SHORTCUT:
return _link_template_internet_shortcut
elif self == self.IGNORE:
elif self == Links.IGNORE:
return None
raise ValueError("Missing switch case")
def extension(self) -> Optional[str]:
if self == self.FANCY:
if self == Links.FANCY:
return ".html"
elif self == self.PLAINTEXT:
elif self == Links.PLAINTEXT:
return ".txt"
elif self == self.INTERNET_SHORTCUT:
elif self == Links.INTERNET_SHORTCUT:
return ".url"
elif self == self.IGNORE:
elif self == Links.IGNORE:
return None
raise ValueError("Missing switch case")

View File

@ -82,7 +82,7 @@ def clean(soup: BeautifulSoup) -> BeautifulSoup:
dummy.decompose()
if len(children) > 1:
continue
if type(children[0]) == Comment:
if isinstance(type(children[0]), Comment):
dummy.decompose()
for hrule_imposter in soup.find_all(class_="ilc_section_Separator"):

View File

@ -3,7 +3,7 @@ import re
from dataclasses import dataclass
from datetime import date, datetime, timedelta
from enum import Enum
from typing import Dict, List, Optional, Union
from typing import Dict, List, Optional, Union, cast
from urllib.parse import urljoin, urlparse
from bs4 import BeautifulSoup, Tag
@ -22,13 +22,18 @@ class IliasElementType(Enum):
FOLDER = "folder"
FORUM = "forum"
LINK = "link"
INFO_TAB = "info_tab"
LEARNING_MODULE = "learning_module"
BOOKING = "booking"
MEETING = "meeting"
SURVEY = "survey"
VIDEO = "video"
VIDEO_PLAYER = "video_player"
VIDEO_FOLDER = "video_folder"
VIDEO_FOLDER_MAYBE_PAGINATED = "video_folder_maybe_paginated"
SCORM_LEARNING_MODULE = "scorm_learning_module"
MEDIACAST_VIDEO_FOLDER = "mediacast_video_folder"
MEDIACAST_VIDEO = "mediacast_video"
OPENCAST_VIDEO = "opencast_video"
OPENCAST_VIDEO_PLAYER = "opencast_video_player"
OPENCAST_VIDEO_FOLDER = "opencast_video_folder"
OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED = "opencast_video_folder_maybe_paginated"
@dataclass
@ -44,7 +49,8 @@ class IliasPageElement:
r"eid=(?P<id>[0-9a-z\-]+)",
r"file_(?P<id>\d+)",
r"ref_id=(?P<id>\d+)",
r"target=[a-z]+_(?P<id>\d+)"
r"target=[a-z]+_(?P<id>\d+)",
r"mm_(?P<id>\d+)"
]
for regex in regexes:
@ -71,6 +77,14 @@ class IliasForumThread:
mtime: Optional[datetime]
@dataclass
class IliasLearningModulePage:
title: str
content: Tag
next_url: Optional[str]
previous_url: Optional[str]
class IliasPage:
def __init__(self, soup: BeautifulSoup, _page_url: str, source_element: Optional[IliasPageElement]):
@ -79,6 +93,16 @@ class IliasPage:
self._page_type = source_element.type if source_element else None
self._source_name = source_element.name if source_element else ""
@staticmethod
def is_root_page(soup: BeautifulSoup) -> bool:
permalink = soup.find(id="current_perma_link")
if permalink is None:
return False
value = permalink.attrs.get("value")
if value is None:
return False
return "goto.php?target=root_" in value
def get_child_elements(self) -> List[IliasPageElement]:
"""
Return all child page elements you can find here.
@ -86,9 +110,9 @@ class IliasPage:
if self._is_video_player():
log.explain("Page is a video player, extracting URL")
return self._player_to_video()
if self._is_video_listing():
log.explain("Page is a video listing, searching for elements")
return self._find_video_entries()
if self._is_opencast_video_listing():
log.explain("Page is an opencast video listing, searching for elements")
return self._find_opencast_video_entries()
if self._is_exercise_file():
log.explain("Page is an exercise, searching for elements")
return self._find_exercise_entries()
@ -98,9 +122,25 @@ class IliasPage:
if self._is_content_page():
log.explain("Page is a content page, searching for elements")
return self._find_copa_entries()
if self._is_info_tab():
log.explain("Page is info tab, searching for elements")
return self._find_info_tab_entries()
log.explain("Page is a normal folder, searching for elements")
return self._find_normal_entries()
def get_info_tab(self) -> Optional[IliasPageElement]:
tab: Optional[Tag] = self._soup.find(
name="a",
attrs={"href": lambda x: x and "cmdClass=ilinfoscreengui" in x}
)
if tab is not None:
return IliasPageElement(
IliasElementType.INFO_TAB,
self._abs_url_from_link(tab),
"infos"
)
return None
def get_description(self) -> Optional[BeautifulSoup]:
def is_interesting_class(name: str) -> bool:
return name in ["ilCOPageSection", "ilc_Paragraph", "ilc_va_ihcap_VAccordIHeadCap"]
@ -126,6 +166,34 @@ class IliasPage:
return BeautifulSoup(raw_html, "html.parser")
def get_learning_module_data(self) -> Optional[IliasLearningModulePage]:
if not self._is_learning_module_page():
return None
content = self._soup.select_one("#ilLMPageContent")
title = self._soup.select_one(".ilc_page_title_PageTitle").getText().strip()
return IliasLearningModulePage(
title=title,
content=content,
next_url=self._find_learning_module_next(),
previous_url=self._find_learning_module_prev()
)
def _find_learning_module_next(self) -> Optional[str]:
for link in self._soup.select("a.ilc_page_rnavlink_RightNavigationLink"):
url = self._abs_url_from_link(link)
if "baseClass=ilLMPresentationGUI" not in url:
continue
return url
return None
def _find_learning_module_prev(self) -> Optional[str]:
for link in self._soup.select("a.ilc_page_lnavlink_LeftNavigationLink"):
url = self._abs_url_from_link(link)
if "baseClass=ilLMPresentationGUI" not in url:
continue
return url
return None
def get_download_forum_data(self) -> Optional[IliasDownloadForumData]:
form = self._soup.find("form", attrs={"action": lambda x: x and "fallbackCmd=showThreads" in x})
if not form:
@ -152,12 +220,18 @@ class IliasPage:
if self._is_ilias_opencast_embedding():
log.explain("Unwrapping opencast embedding")
return self.get_child_elements()[0]
if self._page_type == IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED:
if self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED:
log.explain("Unwrapping video pagination")
return self._find_video_entries_paginated()[0]
return self._find_opencast_video_entries_paginated()[0]
if self._contains_collapsed_future_meetings():
log.explain("Requesting *all* future meetings")
return self._uncollapse_future_meetings_url()
if not self._is_content_tab_selected():
if self._page_type != IliasElementType.INFO_TAB:
log.explain("Selecting content tab")
return self._select_content_page_url()
else:
log.explain("Crawling info tab, skipping content select")
return None
def _is_forum_page(self) -> bool:
@ -170,7 +244,7 @@ class IliasPage:
def _is_video_player(self) -> bool:
return "paella_config_file" in str(self._soup)
def _is_video_listing(self) -> bool:
def _is_opencast_video_listing(self) -> bool:
if self._is_ilias_opencast_embedding():
return True
@ -210,16 +284,50 @@ class IliasPage:
return False
return "target=copa_" in link.get("value")
def _is_learning_module_page(self) -> bool:
link = self._soup.find(id="current_perma_link")
if not link:
return False
return "target=pg_" in link.get("value")
def _contains_collapsed_future_meetings(self) -> bool:
return self._uncollapse_future_meetings_url() is not None
def _uncollapse_future_meetings_url(self) -> Optional[IliasPageElement]:
element = self._soup.find("a", attrs={"href": lambda x: x and "crs_next_sess=1" in x})
element = self._soup.find(
"a",
attrs={"href": lambda x: x and ("crs_next_sess=1" in x or "crs_prev_sess=1" in x)}
)
if not element:
return None
link = self._abs_url_from_link(element)
return IliasPageElement(IliasElementType.FOLDER, link, "show all meetings")
def _is_content_tab_selected(self) -> bool:
return self._select_content_page_url() is None
def _is_info_tab(self) -> bool:
might_be_info = self._soup.find("form", attrs={"name": lambda x: x == "formInfoScreen"}) is not None
return self._page_type == IliasElementType.INFO_TAB and might_be_info
def _select_content_page_url(self) -> Optional[IliasPageElement]:
tab = self._soup.find(
id="tab_view_content",
attrs={"class": lambda x: x is not None and "active" not in x}
)
# Already selected (or not found)
if not tab:
return None
link = tab.find("a")
if link:
link = self._abs_url_from_link(link)
return IliasPageElement(IliasElementType.FOLDER, link, "select content page")
_unexpected_html_warning()
log.warn_contd(f"Could not find content tab URL on {self._page_url!r}.")
log.warn_contd("PFERD might not find content on the course's main page.")
return None
def _player_to_video(self) -> List[IliasPageElement]:
# Fetch the actual video page. This is a small wrapper page initializing a javscript
# player. Sadly we can not execute that JS. The actual video stream url is nowhere
@ -243,14 +351,14 @@ class IliasPage:
# and just fetch the lone video url!
if len(streams) == 1:
video_url = streams[0]["sources"]["mp4"][0]["src"]
return [IliasPageElement(IliasElementType.VIDEO, video_url, self._source_name)]
return [IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, self._source_name)]
log.explain(f"Found multiple videos for stream at {self._source_name}")
items = []
for stream in sorted(streams, key=lambda stream: stream["content"]):
full_name = f"{self._source_name.replace('.mp4', '')} ({stream['content']}).mp4"
video_url = stream["sources"]["mp4"][0]["src"]
items.append(IliasPageElement(IliasElementType.VIDEO, video_url, full_name))
items.append(IliasPageElement(IliasElementType.OPENCAST_VIDEO, video_url, full_name))
return items
@ -298,7 +406,8 @@ class IliasPage:
for link in links:
url = self._abs_url_from_link(link)
name = _sanitize_path_name(link.getText().strip().replace("\t", ""))
name = re.sub(r"\([\d,.]+ [MK]B\)", "", link.getText()).strip().replace("\t", "")
name = _sanitize_path_name(name)
if "file_id" not in url:
_unexpected_html_warning()
@ -309,7 +418,24 @@ class IliasPage:
return items
def _find_video_entries(self) -> List[IliasPageElement]:
def _find_info_tab_entries(self) -> List[IliasPageElement]:
items = []
links: List[Tag] = self._soup.select("a.il_ContainerItemCommand")
for link in links:
if "cmdClass=ilobjcoursegui" not in link["href"]:
continue
if "cmd=sendfile" not in link["href"]:
continue
items.append(IliasPageElement(
IliasElementType.FILE,
self._abs_url_from_link(link),
_sanitize_path_name(link.getText())
))
return items
def _find_opencast_video_entries(self) -> List[IliasPageElement]:
# ILIAS has three stages for video pages
# 1. The initial dummy page without any videos. This page contains the link to the listing
# 2. The video listing which might be paginated
@ -329,27 +455,27 @@ class IliasPage:
query_params = {"limit": "800", "cmd": "asyncGetTableGUI", "cmdMode": "asynch"}
url = url_set_query_params(url, query_params)
log.explain("Found ILIAS video frame page, fetching actual content next")
return [IliasPageElement(IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED, url, "")]
is_paginated = self._soup.find(id=re.compile(r"tab_page_sel.+")) is not None
if is_paginated and not self._page_type == IliasElementType.VIDEO_FOLDER:
if is_paginated and not self._page_type == IliasElementType.OPENCAST_VIDEO_FOLDER:
# We are in stage 2 - try to break pagination
return self._find_video_entries_paginated()
return self._find_opencast_video_entries_paginated()
return self._find_video_entries_no_paging()
return self._find_opencast_video_entries_no_paging()
def _find_video_entries_paginated(self) -> List[IliasPageElement]:
def _find_opencast_video_entries_paginated(self) -> List[IliasPageElement]:
table_element: Tag = self._soup.find(name="table", id=re.compile(r"tbl_xoct_.+"))
if table_element is None:
log.warn("Couldn't increase elements per page (table not found). I might miss elements.")
return self._find_video_entries_no_paging()
return self._find_opencast_video_entries_no_paging()
id_match = re.match(r"tbl_xoct_(.+)", table_element.attrs["id"])
if id_match is None:
log.warn("Couldn't increase elements per page (table id not found). I might miss elements.")
return self._find_video_entries_no_paging()
return self._find_opencast_video_entries_no_paging()
table_id = id_match.group(1)
@ -358,9 +484,9 @@ class IliasPage:
url = url_set_query_params(self._page_url, query_params)
log.explain("Disabled pagination, retrying folder as a new entry")
return [IliasPageElement(IliasElementType.VIDEO_FOLDER, url, "")]
return [IliasPageElement(IliasElementType.OPENCAST_VIDEO_FOLDER, url, "")]
def _find_video_entries_no_paging(self) -> List[IliasPageElement]:
def _find_opencast_video_entries_no_paging(self) -> List[IliasPageElement]:
"""
Crawls the "second stage" video page. This page contains the actual video urls.
"""
@ -372,11 +498,11 @@ class IliasPage:
results: List[IliasPageElement] = []
for link in video_links:
results.append(self._listed_video_to_element(link))
results.append(self._listed_opencast_video_to_element(link))
return results
def _listed_video_to_element(self, link: Tag) -> IliasPageElement:
def _listed_opencast_video_to_element(self, link: Tag) -> IliasPageElement:
# The link is part of a table with multiple columns, describing metadata.
# 6th or 7th child (1 indexed) is the modification time string. Try to find it
# by parsing backwards from the end and finding something that looks like a date
@ -403,7 +529,9 @@ class IliasPage:
video_url = self._abs_url_from_link(link)
log.explain(f"Found video {video_name!r} at {video_url}")
return IliasPageElement(IliasElementType.VIDEO_PLAYER, video_url, video_name, modification_time)
return IliasPageElement(
IliasElementType.OPENCAST_VIDEO_PLAYER, video_url, video_name, modification_time
)
def _find_exercise_entries(self) -> List[IliasPageElement]:
if self._soup.find(id="tab_submission"):
@ -546,9 +674,48 @@ class IliasPage:
result.append(IliasPageElement(element_type, abs_url, element_name, description=description))
result += self._find_cards()
result += self._find_mediacast_videos()
return result
def _find_mediacast_videos(self) -> List[IliasPageElement]:
videos: List[IliasPageElement] = []
for elem in cast(List[Tag], self._soup.select(".ilPlayerPreviewOverlayOuter")):
element_name = _sanitize_path_name(
elem.select_one(".ilPlayerPreviewDescription").getText().strip()
)
if not element_name.endswith(".mp4"):
# just to make sure it has some kinda-alrightish ending
element_name = element_name + ".mp4"
video_element = elem.find(name="video")
if not video_element:
_unexpected_html_warning()
log.warn_contd(f"No <video> element found for mediacast video '{element_name}'")
continue
videos.append(IliasPageElement(
type=IliasElementType.MEDIACAST_VIDEO,
url=self._abs_url_from_relative(video_element.get("src")),
name=element_name,
mtime=self._find_mediacast_video_mtime(elem.findParent(name="td"))
))
return videos
def _find_mediacast_video_mtime(self, enclosing_td: Tag) -> Optional[datetime]:
description_td: Tag = enclosing_td.findPreviousSibling("td")
if not description_td:
return None
meta_tag: Tag = description_td.find_all("p")[-1]
if not meta_tag:
return None
updated_str = meta_tag.getText().strip().replace("\n", " ")
updated_str = re.sub(".+?: ", "", updated_str)
return demangle_date(updated_str)
def _is_in_expanded_meeting(self, tag: Tag) -> bool:
"""
Returns whether a file is part of an expanded meeting.
@ -685,7 +852,11 @@ class IliasPage:
"div",
attrs={"class": lambda x: x and "caption" in x},
)
description = caption_parent.find_next_sibling("div").getText().strip()
caption_container = caption_parent.find_next_sibling("div")
if caption_container:
description = caption_container.getText().strip()
else:
description = None
if not type:
_unexpected_html_warning()
@ -715,8 +886,8 @@ class IliasPage:
icon: Tag = card_root.select_one(".il-card-repository-head .icon")
if "opencast" in icon["class"]:
return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
if "opencast" in icon["class"] or "xoct" in icon["class"]:
return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
if "exc" in icon["class"]:
return IliasElementType.EXERCISE
if "webr" in icon["class"]:
@ -735,6 +906,10 @@ class IliasPage:
return IliasElementType.FOLDER
if "svy" in icon["class"]:
return IliasElementType.SURVEY
if "file" in icon["class"]:
return IliasElementType.FILE
if "mcst" in icon["class"]:
return IliasElementType.MEDIACAST_VIDEO_FOLDER
_unexpected_html_warning()
log.warn_contd(f"Could not extract type from {icon} for card title {card_title}")
@ -773,6 +948,15 @@ class IliasPage:
if "cmdClass=ilobjtestgui" in parsed_url.query:
return IliasElementType.TEST
if "baseClass=ilLMPresentationGUI" in parsed_url.query:
return IliasElementType.LEARNING_MODULE
if "baseClass=ilMediaCastHandlerGUI" in parsed_url.query:
return IliasElementType.MEDIACAST_VIDEO_FOLDER
if "baseClass=ilSAHSPresentationGUI" in parsed_url.query:
return IliasElementType.SCORM_LEARNING_MODULE
# Booking and Meeting can not be detected based on the link. They do have a ref_id though, so
# try to guess it from the image.
@ -814,7 +998,11 @@ class IliasPage:
if img_tag is None:
img_tag = found_parent.select_one("img.icon")
if img_tag is None and found_parent.find("a", attrs={"href": lambda x: x and "crs_next_sess=" in x}):
is_session_expansion_button = found_parent.find(
"a",
attrs={"href": lambda x: x and ("crs_next_sess=" in x or "crs_prev_sess=" in x)}
)
if img_tag is None and is_session_expansion_button:
log.explain("Found session expansion button, skipping it as it has no content")
return None
@ -824,7 +1012,7 @@ class IliasPage:
return None
if "opencast" in str(img_tag["alt"]).lower():
return IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED
return IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED
if str(img_tag["src"]).endswith("icon_exc.svg"):
return IliasElementType.EXERCISE
@ -844,6 +1032,12 @@ class IliasPage:
if str(img_tag["src"]).endswith("icon_tst.svg"):
return IliasElementType.TEST
if str(img_tag["src"]).endswith("icon_mcst.svg"):
return IliasElementType.MEDIACAST_VIDEO_FOLDER
if str(img_tag["src"]).endswith("icon_sahs.svg"):
return IliasElementType.SCORM_LEARNING_MODULE
return IliasElementType.FOLDER
@staticmethod

View File

@ -1,8 +1,11 @@
import asyncio
import base64
import os
import re
from collections.abc import Awaitable, Coroutine
from pathlib import PurePath
from typing import Any, Callable, Dict, List, Optional, Set, Union, cast
from typing import Any, Callable, Dict, List, Literal, Optional, Set, Union, cast
from urllib.parse import urljoin
import aiohttp
import yarl
@ -16,10 +19,10 @@ from ...output_dir import FileSink, Redownload
from ...utils import fmt_path, soupify, url_set_query_param
from ..crawler import AWrapped, CrawlError, CrawlToken, CrawlWarning, DownloadToken, anoncritical
from ..http_crawler import HttpCrawler, HttpCrawlerSection
from .file_templates import Links
from .file_templates import Links, learning_module_template
from .ilias_html_cleaner import clean, insert_base_markup
from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasPage, IliasPageElement,
_sanitize_path_name, parse_ilias_forum_export)
from .kit_ilias_html import (IliasElementType, IliasForumThread, IliasLearningModulePage, IliasPage,
IliasPageElement, _sanitize_path_name, parse_ilias_forum_export)
TargetType = Union[str, int]
@ -82,16 +85,20 @@ _DIRECTORY_PAGES: Set[IliasElementType] = set([
IliasElementType.EXERCISE,
IliasElementType.EXERCISE_FILES,
IliasElementType.FOLDER,
IliasElementType.INFO_TAB,
IliasElementType.MEETING,
IliasElementType.VIDEO_FOLDER,
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
IliasElementType.MEDIACAST_VIDEO_FOLDER,
IliasElementType.OPENCAST_VIDEO_FOLDER,
IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
])
_VIDEO_ELEMENTS: Set[IliasElementType] = set([
IliasElementType.VIDEO,
IliasElementType.VIDEO_PLAYER,
IliasElementType.VIDEO_FOLDER,
IliasElementType.VIDEO_FOLDER_MAYBE_PAGINATED,
IliasElementType.MEDIACAST_VIDEO_FOLDER,
IliasElementType.MEDIACAST_VIDEO,
IliasElementType.OPENCAST_VIDEO,
IliasElementType.OPENCAST_VIDEO_PLAYER,
IliasElementType.OPENCAST_VIDEO_FOLDER,
IliasElementType.OPENCAST_VIDEO_FOLDER_MAYBE_PAGINATED,
])
@ -239,7 +246,7 @@ instance's greatest bottleneck.
# Duplicated code, but the root page is special - we want to avoid fetching it twice!
while next_stage_url:
soup = await self._get_page(next_stage_url)
soup = await self._get_page(next_stage_url, root_page_allowed=True)
if current_parent is None and expected_id is not None:
perma_link_element: Tag = soup.find(id="current_perma_link")
@ -256,6 +263,8 @@ instance's greatest bottleneck.
next_stage_url = None
elements.extend(page.get_child_elements())
if info_tab := page.get_info_tab():
elements.append(info_tab)
if description_string := page.get_description():
description.append(description_string)
@ -394,14 +403,26 @@ instance's greatest bottleneck.
"[bright_black](surveys contain no relevant data)"
)
return None
elif element.type == IliasElementType.SCORM_LEARNING_MODULE:
log.status(
"[bold bright_black]",
"Ignored",
fmt_path(element_path),
"[bright_black](scorm learning modules are not supported)"
)
return None
elif element.type == IliasElementType.LEARNING_MODULE:
return await self._handle_learning_module(element, element_path)
elif element.type == IliasElementType.LINK:
return await self._handle_link(element, element_path)
elif element.type == IliasElementType.BOOKING:
return await self._handle_booking(element, element_path)
elif element.type == IliasElementType.VIDEO:
elif element.type == IliasElementType.OPENCAST_VIDEO:
return await self._handle_file(element, element_path)
elif element.type == IliasElementType.OPENCAST_VIDEO_PLAYER:
return await self._handle_opencast_video(element, element_path)
elif element.type == IliasElementType.MEDIACAST_VIDEO:
return await self._handle_file(element, element_path)
elif element.type == IliasElementType.VIDEO_PLAYER:
return await self._handle_video(element, element_path)
elif element.type in _DIRECTORY_PAGES:
return await self._handle_ilias_page(element.url, element, element_path)
else:
@ -518,7 +539,7 @@ instance's greatest bottleneck.
raise CrawlError("resolve_link_target failed even after authenticating")
async def _handle_video(
async def _handle_opencast_video(
self,
element: IliasPageElement,
element_path: PurePath,
@ -539,18 +560,18 @@ instance's greatest bottleneck.
# If we do not want to crawl it (user filter) or we have every file
# from the cached mapping already, we can ignore this and bail
if not maybe_dl or self._all_videos_locally_present(element_path):
if not maybe_dl or self._all_opencast_videos_locally_present(element_path):
# Mark all existing cideos as known so they do not get deleted
# during dleanup. We "downloaded" them, just without actually making
# a network request as we assumed they did not change.
for video in self._previous_contained_videos(element_path):
for video in self._previous_contained_opencast_videos(element_path):
await self.download(video)
return None
return self._download_video(element_path, element, maybe_dl)
return self._download_opencast_video(element_path, element, maybe_dl)
def _previous_contained_videos(self, video_path: PurePath) -> List[PurePath]:
def _previous_contained_opencast_videos(self, video_path: PurePath) -> List[PurePath]:
if not self.prev_report:
return []
custom_value = self.prev_report.get_custom_value(str(video_path))
@ -560,12 +581,12 @@ instance's greatest bottleneck.
folder = video_path.parent
return [PurePath(folder, name) for name in names]
def _all_videos_locally_present(self, video_path: PurePath) -> bool:
if contained_videos := self._previous_contained_videos(video_path):
def _all_opencast_videos_locally_present(self, video_path: PurePath) -> bool:
if contained_videos := self._previous_contained_opencast_videos(video_path):
log.explain_topic(f"Checking local cache for video {video_path.name}")
all_found_locally = True
for video in contained_videos:
transformed_path = self._to_local_video_path(video)
transformed_path = self._to_local_opencast_video_path(video)
if transformed_path:
exists_locally = self._output_dir.resolve(transformed_path).exists()
all_found_locally = all_found_locally and exists_locally
@ -575,14 +596,14 @@ instance's greatest bottleneck.
log.explain("Missing at least one video, continuing with requests!")
return False
def _to_local_video_path(self, path: PurePath) -> Optional[PurePath]:
def _to_local_opencast_video_path(self, path: PurePath) -> Optional[PurePath]:
if transformed := self._transformer.transform(path):
return self._deduplicator.fixup_path(transformed)
return None
@anoncritical
@_iorepeat(3, "downloading video")
async def _download_video(
async def _download_opencast_video(
self,
original_path: PurePath,
element: IliasPageElement,
@ -599,7 +620,7 @@ instance's greatest bottleneck.
log.explain(f"Using single video mode for {element.name}")
stream_element = stream_elements[0]
transformed_path = self._to_local_video_path(original_path)
transformed_path = self._to_local_opencast_video_path(original_path)
if not transformed_path:
raise CrawlError(f"Download returned a path but transform did not for {original_path}")
@ -695,7 +716,7 @@ instance's greatest bottleneck.
log.explain(f"URL: {next_stage_url}")
soup = await self._get_page(next_stage_url)
page = IliasPage(soup, next_stage_url, None)
page = IliasPage(soup, next_stage_url, element)
if next := page.get_next_stage_element():
next_stage_url = next.url
@ -739,12 +760,142 @@ instance's greatest bottleneck.
sink.file.write(content.encode("utf-8"))
sink.done()
async def _get_page(self, url: str) -> BeautifulSoup:
async def _handle_learning_module(
self,
element: IliasPageElement,
element_path: PurePath,
) -> Optional[Coroutine[Any, Any, None]]:
maybe_cl = await self.crawl(element_path)
if not maybe_cl:
return None
return self._crawl_learning_module(element, maybe_cl)
@_iorepeat(3, "crawling learning module")
@anoncritical
async def _crawl_learning_module(self, element: IliasPageElement, cl: CrawlToken) -> None:
elements: List[IliasLearningModulePage] = []
async with cl:
log.explain_topic(f"Parsing initial HTML page for {fmt_path(cl.path)}")
log.explain(f"URL: {element.url}")
soup = await self._get_page(element.url)
page = IliasPage(soup, element.url, element)
if next := page.get_learning_module_data():
elements.extend(await self._crawl_learning_module_direction(
cl.path, next.previous_url, "left", element
))
elements.append(next)
elements.extend(await self._crawl_learning_module_direction(
cl.path, next.next_url, "right", element
))
# Reflect their natural ordering in the file names
for index, lm_element in enumerate(elements):
lm_element.title = f"{index:02}_{lm_element.title}"
tasks: List[Awaitable[None]] = []
for index, elem in enumerate(elements):
prev_url = elements[index - 1].title if index > 0 else None
next_url = elements[index + 1].title if index < len(elements) - 1 else None
tasks.append(asyncio.create_task(
self._download_learning_module_page(cl.path, elem, prev_url, next_url)
))
# And execute them
await self.gather(tasks)
async def _crawl_learning_module_direction(
self,
path: PurePath,
start_url: Optional[str],
dir: Union[Literal["left"], Literal["right"]],
parent_element: IliasPageElement
) -> List[IliasLearningModulePage]:
elements: List[IliasLearningModulePage] = []
if not start_url:
return elements
next_element_url: Optional[str] = start_url
counter = 0
while next_element_url:
log.explain_topic(f"Parsing HTML page for {fmt_path(path)} ({dir}-{counter})")
log.explain(f"URL: {next_element_url}")
soup = await self._get_page(next_element_url)
page = IliasPage(soup, next_element_url, parent_element)
if next := page.get_learning_module_data():
elements.append(next)
if dir == "left":
next_element_url = next.previous_url
else:
next_element_url = next.next_url
counter += 1
return elements
@anoncritical
@_iorepeat(3, "saving learning module page")
async def _download_learning_module_page(
self,
parent_path: PurePath,
element: IliasLearningModulePage,
prev: Optional[str],
next: Optional[str]
) -> None:
path = parent_path / (_sanitize_path_name(element.title) + ".html")
maybe_dl = await self.download(path)
if not maybe_dl:
return
my_path = self._transformer.transform(maybe_dl.path)
if not my_path:
return
if prev:
prev_p = self._transformer.transform(parent_path / (_sanitize_path_name(prev) + ".html"))
if prev_p:
prev = os.path.relpath(prev_p, my_path.parent)
else:
prev = None
if next:
next_p = self._transformer.transform(parent_path / (_sanitize_path_name(next) + ".html"))
if next_p:
next = os.path.relpath(next_p, my_path.parent)
else:
next = None
async with maybe_dl as (bar, sink):
content = element.content
content = await self.internalize_images(content)
sink.file.write(learning_module_template(content, maybe_dl.path.name, prev, next).encode("utf-8"))
sink.done()
async def internalize_images(self, tag: Tag) -> Tag:
"""
Tries to fetch ILIAS images and embed them as base64 data.
"""
log.explain_topic("Internalizing images")
for elem in tag.find_all(recursive=True):
if not isinstance(elem, Tag):
continue
if elem.name == "img":
if src := elem.attrs.get("src", None):
url = urljoin(_ILIAS_URL, src)
if not url.startswith(_ILIAS_URL):
continue
log.explain(f"Internalizing {url!r}")
img = await self._get_authenticated(url)
elem.attrs["src"] = "data:;base64," + base64.b64encode(img).decode()
if elem.name == "iframe" and elem.attrs.get("src", "").startswith("//"):
# For unknown reasons the protocol seems to be stripped.
elem.attrs["src"] = "https:" + elem.attrs["src"]
return tag
async def _get_page(self, url: str, root_page_allowed: bool = False) -> BeautifulSoup:
auth_id = await self._current_auth_id()
async with self.session.get(url) as request:
soup = soupify(await request.read())
if self._is_logged_in(soup):
return soup
return self._verify_page(soup, url, root_page_allowed)
# We weren't authenticated, so try to do that
await self.authenticate(auth_id)
@ -753,14 +904,26 @@ instance's greatest bottleneck.
async with self.session.get(url) as request:
soup = soupify(await request.read())
if self._is_logged_in(soup):
return soup
raise CrawlError("get_page failed even after authenticating")
return self._verify_page(soup, url, root_page_allowed)
raise CrawlError(f"get_page failed even after authenticating on {url!r}")
def _verify_page(self, soup: BeautifulSoup, url: str, root_page_allowed: bool) -> BeautifulSoup:
if IliasPage.is_root_page(soup) and not root_page_allowed:
raise CrawlError(
"Unexpectedly encountered ILIAS root page. "
"This usually happens because the ILIAS instance is broken. "
"If so, wait a day or two and try again. "
"It could also happen because a crawled element links to the ILIAS root page. "
"If so, use a transform with a ! as target to ignore the particular element. "
f"The redirect came from {url}"
)
return soup
async def _post_authenticated(
self,
url: str,
data: dict[str, Union[str, List[str]]]
) -> BeautifulSoup:
) -> bytes:
auth_id = await self._current_auth_id()
form_data = aiohttp.FormData()
@ -780,6 +943,22 @@ instance's greatest bottleneck.
return await request.read()
raise CrawlError("post_authenticated failed even after authenticating")
async def _get_authenticated(self, url: str) -> bytes:
auth_id = await self._current_auth_id()
async with self.session.get(url, allow_redirects=False) as request:
if request.status == 200:
return await request.read()
# We weren't authenticated, so try to do that
await self.authenticate(auth_id)
# Retry once after authenticating. If this fails, we will die.
async with self.session.get(url, allow_redirects=False) as request:
if request.status == 200:
return await request.read()
raise CrawlError("get_authenticated failed even after authenticating")
# We repeat this as the login method in shibboleth doesn't handle I/O errors.
# Shibboleth is quite reliable as well, the repeat is likely not critical here.
@ _iorepeat(3, "Login", failure_is_error=True)

View File

@ -14,7 +14,7 @@ def name_variants(path: PurePath) -> Iterator[PurePath]:
class Deduplicator:
FORBIDDEN_CHARS = '<>:"/\\|?*'
FORBIDDEN_CHARS = '<>:"/\\|?*' + "".join([chr(i) for i in range(0, 32)])
FORBIDDEN_NAMES = {
"CON", "PRN", "AUX", "NUL",
"COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",

View File

@ -59,6 +59,7 @@ class Log:
# Whether different parts of the output are enabled or disabled
self.output_explain = False
self.output_status = True
self.output_not_deleted = True
self.output_report = True
def _update_live(self) -> None:
@ -207,6 +208,17 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
action = escape(f"{action:<{self.STATUS_WIDTH}}")
self.print(f"{style}{action}[/] {escape(text)} {suffix}")
def not_deleted(self, style: str, action: str, text: str, suffix: str = "") -> None:
"""
Print a message for a local only file that wasn't
deleted while crawling. Allows markup in the "style"
argument which will be applied to the "action" string.
"""
if self.output_status and self.output_not_deleted:
action = escape(f"{action:<{self.STATUS_WIDTH}}")
self.print(f"{style}{action}[/] {escape(text)} {suffix}")
def report(self, text: str) -> None:
"""
Print a report after crawling. Allows markup.
@ -215,6 +227,14 @@ directly or as a GitHub issue: https://github.com/Garmelon/PFERD/issues/new
if self.output_report:
self.print(text)
def report_not_deleted(self, text: str) -> None:
"""
Print a report for a local only file that wasn't deleted after crawling. Allows markup.
"""
if self.output_report and self.output_not_deleted:
self.print(text)
@contextmanager
def _bar(
self,

View File

@ -44,6 +44,7 @@ class OnConflict(Enum):
LOCAL_FIRST = "local-first"
REMOTE_FIRST = "remote-first"
NO_DELETE = "no-delete"
NO_DELETE_PROMPT_OVERWRITE = "no-delete-prompt-overwrite"
@staticmethod
def from_string(string: str) -> "OnConflict":
@ -51,7 +52,7 @@ class OnConflict(Enum):
return OnConflict(string)
except ValueError:
raise ValueError("must be one of 'prompt', 'local-first',"
" 'remote-first', 'no-delete'")
" 'remote-first', 'no-delete', 'no-delete-prompt-overwrite'")
@dataclass
@ -264,7 +265,7 @@ class OutputDirectory:
on_conflict: OnConflict,
path: PurePath,
) -> bool:
if on_conflict == OnConflict.PROMPT:
if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
async with log.exclusive_output():
prompt = f"Replace {fmt_path(path)} with remote file?"
return await prompt_yes_no(prompt, default=False)
@ -283,7 +284,7 @@ class OutputDirectory:
on_conflict: OnConflict,
path: PurePath,
) -> bool:
if on_conflict == OnConflict.PROMPT:
if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
async with log.exclusive_output():
prompt = f"Recursively delete {fmt_path(path)} and replace with remote file?"
return await prompt_yes_no(prompt, default=False)
@ -303,7 +304,7 @@ class OutputDirectory:
path: PurePath,
parent: PurePath,
) -> bool:
if on_conflict == OnConflict.PROMPT:
if on_conflict in {OnConflict.PROMPT, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
async with log.exclusive_output():
prompt = f"Delete {fmt_path(parent)} so remote file {fmt_path(path)} can be downloaded?"
return await prompt_yes_no(prompt, default=False)
@ -330,7 +331,7 @@ class OutputDirectory:
return False
elif on_conflict == OnConflict.REMOTE_FIRST:
return True
elif on_conflict == OnConflict.NO_DELETE:
elif on_conflict in {OnConflict.NO_DELETE, OnConflict.NO_DELETE_PROMPT_OVERWRITE}:
return False
# This should never be reached
@ -495,7 +496,7 @@ class OutputDirectory:
except OSError:
pass
else:
log.status("[bold bright_magenta]", "Not deleted", fmt_path(pure))
log.not_deleted("[bold bright_magenta]", "Not deleted", fmt_path(pure))
self._report.not_delete_file(pure)
def load_prev_report(self) -> None:

View File

@ -180,7 +180,7 @@ class Pferd:
log.report(f" [bold bright_magenta]Deleted[/] {fmt_path(path)}")
for path in sorted(crawler.report.not_deleted_files):
something_changed = True
log.report(f" [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
log.report_not_deleted(f" [bold bright_magenta]Not deleted[/] {fmt_path(path)}")
for warning in crawler.report.encountered_warnings:
something_changed = True

View File

@ -1,2 +1,2 @@
NAME = "PFERD"
VERSION = "3.4.3"
VERSION = "3.5.0"

27
flake.lock Normal file
View File

@ -0,0 +1,27 @@
{
"nodes": {
"nixpkgs": {
"locked": {
"lastModified": 1694499547,
"narHash": "sha256-R7xMz1Iia6JthWRHDn36s/E248WB1/je62ovC/dUVKI=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "e5f018cf150e29aac26c61dac0790ea023c46b24",
"type": "github"
},
"original": {
"owner": "NixOS",
"ref": "nixos-23.05",
"repo": "nixpkgs",
"type": "github"
}
},
"root": {
"inputs": {
"nixpkgs": "nixpkgs"
}
}
},
"root": "root",
"version": 7
}

41
flake.nix Normal file
View File

@ -0,0 +1,41 @@
{
description = "Tool for downloading course-related files from ILIAS";
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-23.05";
};
outputs = { self, nixpkgs }:
let
# Helper function to generate an attrset '{ x86_64-linux = f "x86_64-linux"; ... }'.
forAllSystems = nixpkgs.lib.genAttrs nixpkgs.lib.systems.flakeExposed;
in
{
packages = forAllSystems (system:
let pkgs = import nixpkgs { inherit system; };
in
rec {
default = pkgs.python3Packages.buildPythonApplication rec {
pname = "pferd";
# Performing black magic
# Don't worry, I sacrificed enough goats for the next few years
version = (pkgs.lib.importTOML ./PFERD/version.py).VERSION;
format = "pyproject";
src = ./.;
nativeBuildInputs = with pkgs.python3Packages; [
setuptools
];
propagatedBuildInputs = with pkgs.python3Packages; [
aiohttp
beautifulsoup4
rich
keyring
certifi
];
};
});
};
}