Move ilias stuff from aiohttp to requests

This commit is contained in:
Joscha
2019-04-25 18:52:48 +00:00
parent 0e536f476a
commit 9bae030186
4 changed files with 130 additions and 199 deletions

View File

@ -1,19 +1,15 @@
# ILIAS
import aiohttp
import asyncio
import bs4
import logging
import pathlib
import re
from .organizer import Organizer
from .ilias_authenticators import ShibbolethAuthenticator
from . import utils
import bs4
__all__ = [
"ILIAS",
]
from .ilias_authenticators import ShibbolethAuthenticator
from .organizer import Organizer
__all__ = ["ILIAS"]
logger = logging.getLogger(__name__)
class ILIAS:
@ -25,7 +21,7 @@ class ILIAS:
self._auth = ShibbolethAuthenticator(base_path / cookie_file)
async def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
def synchronize(self, ref_id, to_dir, transform=lambda x: x, filter=lambda x: True):
logging.info(f" Synchronizing ref_id {ref_id} to {to_dir} using the ILIAS synchronizer.")
sync_path = pathlib.Path(self.base_path, to_dir)
@ -33,17 +29,14 @@ class ILIAS:
orga.clean_temp_dir()
files = await self._crawl(pathlib.PurePath(), f"fold_{ref_id}", filter)
await self._download(orga, files, transform)
files = self._crawl(pathlib.PurePath(), f"fold_{ref_id}", filter)
self._download(orga, files, transform)
orga.clean_sync_dir()
orga.clean_temp_dir()
async def close(self):
await self._auth.close()
async def _crawl(self, dir_path, dir_id, filter_):
soup = await self._auth.get_webpage(dir_id)
def _crawl(self, dir_path, dir_id, filter_):
soup = self._auth.get_webpage(dir_id)
found_files = []
@ -59,19 +52,19 @@ class ILIAS:
logger.debug(f"Found dir {path}")
if filter_(path):
logger.info(f"Searching {path}")
files = await self._crawl(path, ref_id, filter_)
files = self._crawl(path, ref_id, filter_)
found_files.extend(files)
else:
logger.info(f"Not searching {path}")
return found_files
async def _download(self, orga, files, transform):
def _download(self, orga, files, transform):
for (path, file_id) in sorted(files):
to_path = transform(path)
if to_path is not None:
temp_path = orga.temp_file()
await self._auth.download_file(file_id, temp_path)
self._auth.download_file(file_id, temp_path)
orga.add_file(temp_path, to_path)
def _find_files(self, soup):