mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Sync files from ILIAS
This commit is contained in:
parent
529c4a7dda
commit
34da5d4d19
@ -1,12 +1,10 @@
|
|||||||
from .ffm import *
|
from .ffm import *
|
||||||
from .ilias_authenticators import *
|
from .ilias import *
|
||||||
from .organizer import *
|
|
||||||
from .utils import *
|
from .utils import *
|
||||||
|
|
||||||
__all__ = (
|
__all__ = (
|
||||||
ffm.__all__ +
|
ffm.__all__ +
|
||||||
ilias_authenticators.__all__ +
|
ilias.__all__ +
|
||||||
organizer.__all__ +
|
|
||||||
utils.__all__ +
|
utils.__all__ +
|
||||||
[]
|
[]
|
||||||
)
|
)
|
||||||
|
103
PFERD/ilias.py
Normal file
103
PFERD/ilias.py
Normal file
@ -0,0 +1,103 @@
|
|||||||
|
# ILIAS
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import asyncio
|
||||||
|
import bs4
|
||||||
|
import logging
|
||||||
|
import pathlib
|
||||||
|
import re
|
||||||
|
|
||||||
|
from .organizer import Organizer
|
||||||
|
from .ilias_authenticators import ShibbolethAuthenticator
|
||||||
|
from . import utils
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"ILIAS",
|
||||||
|
]
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class ILIAS:
|
||||||
|
FILE_RE = re.compile(r"goto\.php\?target=(file_\d+_download)")
|
||||||
|
DIR_RE = re.compile(r"ilias\.php\?ref_id=(\d+)")
|
||||||
|
|
||||||
|
def __init__(self, base_path, cookie_file):
|
||||||
|
self.base_path = base_path
|
||||||
|
|
||||||
|
self._auth = ShibbolethAuthenticator(base_path / cookie_file)
|
||||||
|
|
||||||
|
async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
|
||||||
|
logging.info(f"Synchronizing {ref_id} to {to_dir} using the ILIAS synchronizer.")
|
||||||
|
|
||||||
|
sync_path = pathlib.Path(self.base_path, to_dir)
|
||||||
|
orga = Organizer(self.base_path, sync_path)
|
||||||
|
|
||||||
|
orga.clean_temp_dir()
|
||||||
|
|
||||||
|
files = await self._crawl(pathlib.PurePath(), f"fold_{ref_id}", filter)
|
||||||
|
await self._download(orga, files, transform)
|
||||||
|
|
||||||
|
orga.clean_sync_dir()
|
||||||
|
orga.clean_temp_dir()
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
await self._auth.close()
|
||||||
|
|
||||||
|
async def _crawl(self, dir_path, dir_id, filter_):
|
||||||
|
soup = await self._auth.get_webpage(dir_id)
|
||||||
|
|
||||||
|
found_files = []
|
||||||
|
|
||||||
|
files = self._find_files(soup)
|
||||||
|
for (name, file_id) in files:
|
||||||
|
path = dir_path / name
|
||||||
|
found_files.append((path, file_id))
|
||||||
|
logger.debug(f"Found file {path}")
|
||||||
|
|
||||||
|
dirs = self._find_dirs(soup)
|
||||||
|
for (name, ref_id) in dirs:
|
||||||
|
path = dir_path / name
|
||||||
|
logger.debug(f"Found dir {path}")
|
||||||
|
if filter_(path):
|
||||||
|
logger.info(f"Searching {path}")
|
||||||
|
files = await self._crawl(path, ref_id, filter_)
|
||||||
|
found_files.extend(files)
|
||||||
|
else:
|
||||||
|
logger.info(f"Not searching {path}")
|
||||||
|
|
||||||
|
return found_files
|
||||||
|
|
||||||
|
async def _download(self, orga, files, transform):
|
||||||
|
for (path, file_id) in files:
|
||||||
|
to_path = transform(path)
|
||||||
|
if to_path is not None:
|
||||||
|
temp_path = orga.temp_file()
|
||||||
|
await self._auth.download_file(file_id, temp_path)
|
||||||
|
orga.add_file(temp_path, to_path)
|
||||||
|
|
||||||
|
def _find_files(self, soup):
|
||||||
|
files = []
|
||||||
|
|
||||||
|
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.FILE_RE})
|
||||||
|
for element in found:
|
||||||
|
file_stem = element.string
|
||||||
|
file_id = re.search(self.FILE_RE, element.get("href")).group(1)
|
||||||
|
|
||||||
|
# find out file type
|
||||||
|
file_type = element.parent.parent.parent.find("div", {"class": "il_ItemProperties"}).find("span").string.strip()
|
||||||
|
|
||||||
|
file_name = f"{file_stem}.{file_type}"
|
||||||
|
files.append((file_name, file_id))
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
def _find_dirs(self, soup):
|
||||||
|
dirs = []
|
||||||
|
|
||||||
|
found = soup.find_all("a", {"class": "il_ContainerItemTitle", "href": self.DIR_RE})
|
||||||
|
for element in found:
|
||||||
|
dir_name = element.string
|
||||||
|
ref_id = re.search(self.DIR_RE, element.get("href")).group(1)
|
||||||
|
dir_id = f"fold_{ref_id}"
|
||||||
|
dirs.append((dir_name, dir_id))
|
||||||
|
|
||||||
|
return dirs
|
@ -86,7 +86,7 @@ class ShibbolethAuthenticator:
|
|||||||
return saml_response is not None and relay_state is not None
|
return saml_response is not None and relay_state is not None
|
||||||
|
|
||||||
def _save_cookies(self):
|
def _save_cookies(self):
|
||||||
logger.info(f"Saving cookies to {self._cookie_path!r}")
|
logger.info(f"Saving cookies to {self._cookie_path}")
|
||||||
if self._cookie_path is not None:
|
if self._cookie_path is not None:
|
||||||
self._session.cookie_jar.save(self._cookie_path)
|
self._session.cookie_jar.save(self._cookie_path)
|
||||||
|
|
||||||
@ -172,7 +172,7 @@ class ShibbolethAuthenticator:
|
|||||||
|
|
||||||
while True:
|
while True:
|
||||||
async with self._lock.read():
|
async with self._lock.read():
|
||||||
logger.debug(f"Getting {url} {params}")
|
logger.debug(f"Getting {self.ILIAS_GOTO} {params}")
|
||||||
_, text = await self._get(self.ILIAS_GOTO, params=params)
|
_, text = await self._get(self.ILIAS_GOTO, params=params)
|
||||||
soup = bs4.BeautifulSoup(text, "html.parser")
|
soup = bs4.BeautifulSoup(text, "html.parser")
|
||||||
|
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import pathlib
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"get_base_dir",
|
"get_base_dir",
|
||||||
@ -9,7 +10,7 @@ __all__ = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
def get_base_dir(script_file):
|
def get_base_dir(script_file):
|
||||||
return os.path.dirname(os.path.abspath(script_file))
|
return pathlib.Path(os.path.dirname(os.path.abspath(script_file)))
|
||||||
|
|
||||||
async def stream_to_path(resp, to_path, chunk_size=1024**2):
|
async def stream_to_path(resp, to_path, chunk_size=1024**2):
|
||||||
with open(to_path, 'wb') as fd:
|
with open(to_path, 'wb') as fd:
|
||||||
|
@ -3,7 +3,7 @@ import asyncio
|
|||||||
import logging
|
import logging
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO, format=PFERD.LOG_FORMAT)
|
logging.basicConfig(level=logging.DEBUG, format=PFERD.LOG_FORMAT)
|
||||||
|
|
||||||
base_dir = PFERD.get_base_dir(__file__)
|
base_dir = PFERD.get_base_dir(__file__)
|
||||||
|
|
||||||
@ -19,11 +19,21 @@ def ana1(old_path):
|
|||||||
|
|
||||||
return old_path
|
return old_path
|
||||||
|
|
||||||
|
def la1_filter(path):
|
||||||
|
if path.match("Tutorien/*"):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
async def main():
|
async def main():
|
||||||
ffm = PFERD.FfM(base_dir)
|
#ffm = PFERD.FfM(base_dir)
|
||||||
await ffm.synchronize("iana2/lehre/hm1info2018w/de", "HM1", transform=hm1)
|
#await ffm.synchronize("iana2/lehre/hm1info2018w", "HM1", transform=hm1)
|
||||||
await ffm.synchronize("iana1/lehre/ana12018w/de", "Ana1", transform=ana1)
|
#await ffm.synchronize("iana1/lehre/ana12018w", "Ana1", transform=ana1)
|
||||||
await ffm.close()
|
#await ffm.close()
|
||||||
|
|
||||||
|
ilias = PFERD.ILIAS(base_dir, "cookie_jar")
|
||||||
|
await ilias.synchronize("874938", "LA1", filter=la1_filter)
|
||||||
|
await ilias.close()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
asyncio.run(main())
|
asyncio.run(main())
|
||||||
|
Loading…
Reference in New Issue
Block a user