Compare commits

...

10 Commits

Author SHA1 Message Date
85b9f45085 Bump version to 3.0.1 2021-06-01 09:49:30 +00:00
f656e3ff34 Fix credential parsing 2021-06-01 09:18:17 +00:00
e1bda94329 Load credential file from correct path 2021-06-01 09:18:08 +00:00
f6b26f4ead Fix unexpected exception when credential file not found 2021-06-01 09:10:58 +00:00
722970a255 Store cookies in text-based format
Using the stdlib's http.cookie module, cookies are now stored as one
"Set-Cookie" header per line. Previously, the aiohttp.CookieJar's save() and
load() methods were used (which use pickling).
2021-05-31 20:18:20 +00:00
f40820c41f Warn if using concurrent tasks with kit-ilias-web 2021-05-31 20:18:20 +00:00
49ad1b6e46 Clean up authenticator code formatting 2021-05-31 18:45:06 +02:00
1ce32d2f18 Add CLI option for credential file auth to kit-ilias-web 2021-05-31 18:45:06 +02:00
9d5ec84b91 Add credential file authenticator 2021-05-31 18:33:34 +02:00
1fba96abcb Fix exercise date parsing for non-group submissions
ILIAS apparently changes the order of the fields as it sees fit, so we
now try to parse *every* column, starting at from the right, as a date.
The first column that parses successfully is then used.
2021-05-31 18:15:12 +02:00
17 changed files with 154 additions and 40 deletions

View File

@ -22,6 +22,19 @@ ambiguous situations.
## Unreleased ## Unreleased
## 3.0.1 - 2021-06-01
### Added
- `credential-file` authenticator
- `--credential-file` option for `kit-ilias-web` command
- Warning if using concurrent tasks with `kit-ilias-web`
### Changed
- Cookies are now stored in a text-based format
### Fixed
- Date parsing now also works correctly in non-group exercises
## 3.0.0 - 2021-05-31 ## 3.0.0 - 2021-05-31
### Added ### Added

View File

@ -180,6 +180,19 @@ via the terminal.
- `username`: The username. (Optional) - `username`: The username. (Optional)
- `password`: The password. (Optional) - `password`: The password. (Optional)
### The `credential-file` authenticator
This authenticator reads a username and a password from a credential file. The
credential file has exactly two lines (trailing newline optional). The first
line starts with `username=` and contains the username, the second line starts
with `password=` and contains the password. The username and password may
contain any characters except a line break.
```
username=AzureDiamond
password=hunter2
```
### The `keyring` authenticator ### The `keyring` authenticator
This authenticator uses the system keyring to store passwords. The username can This authenticator uses the system keyring to store passwords. The username can

View File

@ -5,7 +5,8 @@ import os
import sys import sys
from pathlib import Path from pathlib import Path
from .cli import PARSER, load_default_section from .auth import AuthLoadError
from .cli import PARSER, ParserLoadError, load_default_section
from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError from .config import Config, ConfigDumpError, ConfigLoadError, ConfigOptionError
from .logging import log from .logging import log
from .pferd import Pferd, PferdLoadError from .pferd import Pferd, PferdLoadError
@ -36,6 +37,9 @@ def load_config(args: argparse.Namespace) -> Config:
log.error(str(e)) log.error(str(e))
log.error_contd(e.reason) log.error_contd(e.reason)
sys.exit(1) sys.exit(1)
except ParserLoadError as e:
log.error(str(e))
sys.exit(1)
def configure_logging_from_args(args: argparse.Namespace) -> None: def configure_logging_from_args(args: argparse.Namespace) -> None:
@ -131,7 +135,7 @@ def main() -> None:
loop.close() loop.close()
else: else:
asyncio.run(pferd.run(args.debug_transforms)) asyncio.run(pferd.run(args.debug_transforms))
except ConfigOptionError as e: except (ConfigOptionError, AuthLoadError) as e:
log.unlock() log.unlock()
log.error(str(e)) log.error(str(e))
sys.exit(1) sys.exit(1)

View File

@ -2,7 +2,8 @@ from configparser import SectionProxy
from typing import Callable, Dict from typing import Callable, Dict
from ..config import Config from ..config import Config
from .authenticator import Authenticator, AuthError, AuthSection # noqa: F401 from .authenticator import Authenticator, AuthError, AuthLoadError, AuthSection # noqa: F401
from .credential_file import CredentialFileAuthenticator, CredentialFileAuthSection
from .keyring import KeyringAuthenticator, KeyringAuthSection from .keyring import KeyringAuthenticator, KeyringAuthSection
from .simple import SimpleAuthenticator, SimpleAuthSection from .simple import SimpleAuthenticator, SimpleAuthSection
from .tfa import TfaAuthenticator from .tfa import TfaAuthenticator
@ -14,10 +15,12 @@ AuthConstructor = Callable[[
], Authenticator] ], Authenticator]
AUTHENTICATORS: Dict[str, AuthConstructor] = { AUTHENTICATORS: Dict[str, AuthConstructor] = {
"credential-file": lambda n, s, c:
CredentialFileAuthenticator(n, CredentialFileAuthSection(s), c),
"keyring": lambda n, s, c:
KeyringAuthenticator(n, KeyringAuthSection(s)),
"simple": lambda n, s, c: "simple": lambda n, s, c:
SimpleAuthenticator(n, SimpleAuthSection(s)), SimpleAuthenticator(n, SimpleAuthSection(s)),
"tfa": lambda n, s, c: "tfa": lambda n, s, c:
TfaAuthenticator(n), TfaAuthenticator(n),
"keyring": lambda n, s, c:
KeyringAuthenticator(n, KeyringAuthSection(s))
} }

View File

@ -17,10 +17,7 @@ class AuthSection(Section):
class Authenticator(ABC): class Authenticator(ABC):
def __init__( def __init__(self, name: str) -> None:
self,
name: str
) -> None:
""" """
Initialize an authenticator from its name and its section in the config Initialize an authenticator from its name and its section in the config
file. file.

View File

@ -0,0 +1,44 @@
from pathlib import Path
from typing import Tuple
from ..config import Config
from ..utils import fmt_real_path
from .authenticator import Authenticator, AuthLoadError, AuthSection
class CredentialFileAuthSection(AuthSection):
def path(self) -> Path:
value = self.s.get("path")
if value is None:
self.missing_value("path")
return Path(value)
class CredentialFileAuthenticator(Authenticator):
def __init__(self, name: str, section: CredentialFileAuthSection, config: Config) -> None:
super().__init__(name)
path = config.default_section.working_dir() / section.path()
try:
with open(path) as f:
lines = list(f)
except OSError as e:
raise AuthLoadError(f"No credential file at {fmt_real_path(path)}") from e
if len(lines) != 2:
raise AuthLoadError("Credential file must be two lines long")
[uline, pline] = lines
uline = uline[:-1] # Remove trailing newline
if pline.endswith("\n"):
pline = pline[:-1]
if not uline.startswith("username="):
raise AuthLoadError("First line must start with 'username='")
if not pline.startswith("password="):
raise AuthLoadError("Second line must start with 'password='")
self._username = uline[9:]
self._password = pline[9:]
async def credentials(self) -> Tuple[str, str]:
return self._username, self._password

View File

@ -18,11 +18,7 @@ class KeyringAuthSection(AuthSection):
class KeyringAuthenticator(Authenticator): class KeyringAuthenticator(Authenticator):
def __init__( def __init__(self, name: str, section: KeyringAuthSection) -> None:
self,
name: str,
section: KeyringAuthSection,
) -> None:
super().__init__(name) super().__init__(name)
self._username = section.username() self._username = section.username()

View File

@ -14,11 +14,7 @@ class SimpleAuthSection(AuthSection):
class SimpleAuthenticator(Authenticator): class SimpleAuthenticator(Authenticator):
def __init__( def __init__(self, name: str, section: SimpleAuthSection) -> None:
self,
name: str,
section: SimpleAuthSection,
) -> None:
super().__init__(name) super().__init__(name)
self._username = section.username() self._username = section.username()

View File

@ -6,10 +6,7 @@ from .authenticator import Authenticator, AuthError
class TfaAuthenticator(Authenticator): class TfaAuthenticator(Authenticator):
def __init__( def __init__(self, name: str) -> None:
self,
name: str,
) -> None:
super().__init__(name) super().__init__(name)
async def username(self) -> str: async def username(self) -> str:

View File

@ -1,11 +1,12 @@
# isort: skip_file # isort: skip_file
# The order of imports matters because each command module registers itself # The order of imports matters because each command module registers itself
# with the parser from ".parser". Because of this, isort is disabled for this # with the parser from ".parser" and the import order affects the order in
# which they appear in the help. Because of this, isort is disabled for this
# file. Also, since we're reexporting or just using the side effect of # file. Also, since we're reexporting or just using the side effect of
# importing itself, we get a few linting warnings, which we're disabling as # importing itself, we get a few linting warnings, which we're disabling as
# well. # well.
from . import command_local # noqa: F401 imported but unused from . import command_local # noqa: F401 imported but unused
from . import command_kit_ilias_web # noqa: F401 imported but unused from . import command_kit_ilias_web # noqa: F401 imported but unused
from .parser import PARSER, load_default_section # noqa: F401 imported but unused from .parser import PARSER, ParserLoadError, load_default_section # noqa: F401 imported but unused

View File

@ -4,7 +4,8 @@ from pathlib import Path
from ..crawl.ilias.file_templates import Links from ..crawl.ilias.file_templates import Links
from ..logging import log from ..logging import log
from .parser import CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, load_crawler, show_value_error from .parser import (CRAWLER_PARSER, SUBPARSERS, BooleanOptionalAction, ParserLoadError, load_crawler,
show_value_error)
SUBPARSER = SUBPARSERS.add_parser( SUBPARSER = SUBPARSERS.add_parser(
"kit-ilias-web", "kit-ilias-web",
@ -38,6 +39,12 @@ GROUP.add_argument(
action=BooleanOptionalAction, action=BooleanOptionalAction,
help="use the system keyring to store and retrieve passwords" help="use the system keyring to store and retrieve passwords"
) )
GROUP.add_argument(
"--credential-file",
type=Path,
metavar="PATH",
help="read username and password from a credential file"
)
GROUP.add_argument( GROUP.add_argument(
"--links", "--links",
type=show_value_error(Links.from_string), type=show_value_error(Links.from_string),
@ -88,11 +95,19 @@ def load(
parser["auth:ilias"] = {} parser["auth:ilias"] = {}
auth_section = parser["auth:ilias"] auth_section = parser["auth:ilias"]
auth_section["type"] = "simple" if args.credential_file is not None:
if args.username is not None:
raise ParserLoadError("--credential-file and --username can't be used together")
if args.keyring:
raise ParserLoadError("--credential-file and --keyring can't be used together")
auth_section["type"] = "credential-file"
auth_section["path"] = str(args.credential_file)
elif args.keyring:
auth_section["type"] = "keyring"
else:
auth_section["type"] = "simple"
if args.username is not None: if args.username is not None:
auth_section["username"] = args.username auth_section["username"] = args.username
if args.keyring:
auth_section["type"] = "keyring"
SUBPARSER.set_defaults(command=load) SUBPARSER.set_defaults(command=load)

View File

@ -8,6 +8,10 @@ from ..output_dir import OnConflict, Redownload
from ..version import NAME, VERSION from ..version import NAME, VERSION
class ParserLoadError(Exception):
pass
# TODO Replace with argparse version when updating to 3.9? # TODO Replace with argparse version when updating to 3.9?
class BooleanOptionalAction(argparse.Action): class BooleanOptionalAction(argparse.Action):
def __init__( def __init__(

View File

@ -69,6 +69,7 @@ class Section:
class DefaultSection(Section): class DefaultSection(Section):
def working_dir(self) -> Path: def working_dir(self) -> Path:
# TODO Change to working dir instead of manually prepending it to paths
pathstr = self.s.get("working_dir", ".") pathstr = self.s.get("working_dir", ".")
return Path(pathstr).expanduser() return Path(pathstr).expanduser()

View File

@ -1,7 +1,8 @@
import asyncio import asyncio
import http.cookies
import ssl import ssl
from pathlib import Path, PurePath from pathlib import Path, PurePath
from typing import Dict, List, Optional from typing import Any, Dict, List, Optional
import aiohttp import aiohttp
import certifi import certifi
@ -105,6 +106,25 @@ class HttpCrawler(Crawler):
self._shared_cookie_jar_paths.append(self._cookie_jar_path) self._shared_cookie_jar_paths.append(self._cookie_jar_path)
def _load_cookies_from_file(self, path: Path) -> None:
jar: Any = http.cookies.SimpleCookie()
with open(path) as f:
for i, line in enumerate(f):
# Names of headers are case insensitive
if line[:11].lower() == "set-cookie:":
jar.load(line[11:])
else:
log.explain(f"Line {i} doesn't start with 'Set-Cookie:', ignoring it")
self._cookie_jar.update_cookies(jar)
def _save_cookies_to_file(self, path: Path) -> None:
jar: Any = http.cookies.SimpleCookie()
for morsel in self._cookie_jar:
jar[morsel.key] = morsel
with open(path, "w") as f:
f.write(jar.output(sep="\n"))
f.write("\n") # A trailing newline is just common courtesy
def _load_cookies(self) -> None: def _load_cookies(self) -> None:
log.explain_topic("Loading cookies") log.explain_topic("Loading cookies")
@ -134,7 +154,7 @@ class HttpCrawler(Crawler):
log.explain(f"Loading cookies from {fmt_real_path(cookie_jar_path)}") log.explain(f"Loading cookies from {fmt_real_path(cookie_jar_path)}")
try: try:
self._cookie_jar.load(cookie_jar_path) self._load_cookies_from_file(cookie_jar_path)
except Exception as e: except Exception as e:
log.explain("Failed to load cookies") log.explain("Failed to load cookies")
log.explain(str(e)) log.explain(str(e))
@ -144,7 +164,7 @@ class HttpCrawler(Crawler):
try: try:
log.explain(f"Saving cookies to {fmt_real_path(self._cookie_jar_path)}") log.explain(f"Saving cookies to {fmt_real_path(self._cookie_jar_path)}")
self._cookie_jar.save(self._cookie_jar_path) self._save_cookies_to_file(self._cookie_jar_path)
except Exception as e: except Exception as e:
log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}") log.warn(f"Failed to save cookies to {fmt_real_path(self._cookie_jar_path)}")
log.warn(str(e)) log.warn(str(e))

View File

@ -230,12 +230,16 @@ class IliasPage:
parent_row: Tag = link.findParent("tr") parent_row: Tag = link.findParent("tr")
children: List[Tag] = parent_row.findChildren("td") children: List[Tag] = parent_row.findChildren("td")
# <checkbox> <name> <uploader> <date> <download>
# 0 1 2 3 4
name = _sanitize_path_name(children[1].getText().strip()) name = _sanitize_path_name(children[1].getText().strip())
date = demangle_date(children[3].getText().strip())
log.explain(f"Found exercise detail entry {name!r}") log.explain(f"Found exercise detail entry {name!r}")
for child in reversed(children):
date = demangle_date(child.getText().strip(), fail_silently=True)
if date is not None:
break
if date is None:
log.warn(f"Date parsing failed for exercise entry {name!r}")
results.append(IliasPageElement( results.append(IliasPageElement(
IliasElementType.FILE, IliasElementType.FILE,
self._abs_url_from_link(link), self._abs_url_from_link(link),
@ -522,7 +526,7 @@ german_months = ['Jan', 'Feb', 'Mär', 'Apr', 'Mai', 'Jun', 'Jul', 'Aug', 'Sep',
english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] english_months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
def demangle_date(date_str: str) -> Optional[datetime]: def demangle_date(date_str: str, fail_silently: bool = False) -> Optional[datetime]:
""" """
Demangle a given date in one of the following formats: Demangle a given date in one of the following formats:
"Gestern, HH:MM" "Gestern, HH:MM"
@ -554,7 +558,8 @@ def demangle_date(date_str: str) -> Optional[datetime]:
return datetime(year, month, day, hour, minute) return datetime(year, month, day, hour, minute)
except Exception: except Exception:
log.warn(f"Date parsing failed for {date_str!r}") if not fail_silently:
log.warn(f"Date parsing failed for {date_str!r}")
return None return None

View File

@ -21,7 +21,6 @@ TargetType = Union[str, int]
class KitIliasWebCrawlerSection(HttpCrawlerSection): class KitIliasWebCrawlerSection(HttpCrawlerSection):
def target(self) -> TargetType: def target(self) -> TargetType:
target = self.s.get("target") target = self.s.get("target")
if not target: if not target:
@ -164,6 +163,12 @@ class KitIliasWebCrawler(HttpCrawler):
auth = section.auth(authenticators) auth = section.auth(authenticators)
super().__init__(name, section, config, shared_auth=auth) super().__init__(name, section, config, shared_auth=auth)
if section.tasks() > 1:
log.warn("""
Please avoid using too many parallel requests as these are the KIT ILIAS
instance's greatest bottleneck.
""".strip())
self._shibboleth_login = KitShibbolethLogin( self._shibboleth_login = KitShibbolethLogin(
auth, auth,
section.tfa_auth(authenticators), section.tfa_auth(authenticators),

View File

@ -1,2 +1,2 @@
NAME = "PFERD" NAME = "PFERD"
VERSION = "3.0.0" VERSION = "3.0.1"