mirror of
https://github.com/Garmelon/PFERD.git
synced 2023-12-21 10:23:01 +01:00
Download files to some local file
This commit is contained in:
parent
cf9d43fe84
commit
4e6912591c
@ -10,6 +10,7 @@ from .ReadWriteLock import ReadWriteLock
|
|||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
"OutOfTriesException",
|
"OutOfTriesException",
|
||||||
|
"UnknownFileType",
|
||||||
"ShibbolethAuthenticator",
|
"ShibbolethAuthenticator",
|
||||||
]
|
]
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
@ -17,10 +18,14 @@ logger = logging.getLogger(__name__)
|
|||||||
class OutOfTriesException(Exception):
|
class OutOfTriesException(Exception):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class UnknownFileType(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
class ShibbolethAuthenticator:
|
class ShibbolethAuthenticator:
|
||||||
|
|
||||||
RETRY_ATTEMPTS = 5
|
RETRY_ATTEMPTS = 5
|
||||||
RETRY_DELAY = 1 # seconds
|
RETRY_DELAY = 1 # seconds
|
||||||
|
CHUNK_SIZE = 1024**2
|
||||||
|
|
||||||
def __init__(self, cookie_path=None):
|
def __init__(self, cookie_path=None):
|
||||||
self._cookie_path = cookie_path
|
self._cookie_path = cookie_path
|
||||||
@ -55,7 +60,7 @@ class ShibbolethAuthenticator:
|
|||||||
logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
|
logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
|
||||||
await asyncio.sleep(self.RETRY_DELAY)
|
await asyncio.sleep(self.RETRY_DELAY)
|
||||||
|
|
||||||
logger.error("Could not retrieve url")
|
logger.error(f"Could not POST {url} params:{params} data:{data}.")
|
||||||
raise OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
|
raise OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
|
||||||
|
|
||||||
async def _get(self, url, params=None):
|
async def _get(self, url, params=None):
|
||||||
@ -68,7 +73,7 @@ class ShibbolethAuthenticator:
|
|||||||
logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
|
logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
|
||||||
await asyncio.sleep(self.RETRY_DELAY)
|
await asyncio.sleep(self.RETRY_DELAY)
|
||||||
|
|
||||||
logger.error("Could not retrieve url")
|
logger.error(f"Could not GET {url} params:{params}.")
|
||||||
raise OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
|
raise OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
|
||||||
|
|
||||||
def _login_successful(self, soup):
|
def _login_successful(self, soup):
|
||||||
@ -170,6 +175,45 @@ class ShibbolethAuthenticator:
|
|||||||
else:
|
else:
|
||||||
await self._ensure_authenticated()
|
await self._ensure_authenticated()
|
||||||
|
|
||||||
async def download_file(self, file_id):
|
async def _stream_to_path(self, resp, to_path):
|
||||||
async with self._lock.read():
|
with open(to_path, 'wb') as fd:
|
||||||
pass # TODO
|
while True:
|
||||||
|
chunk = await resp.content.read(self.CHUNK_SIZE)
|
||||||
|
if not chunk:
|
||||||
|
break
|
||||||
|
fd.write(chunk)
|
||||||
|
|
||||||
|
async def _download(self, url, params, to_path):
|
||||||
|
for t in range(self.RETRY_ATTEMPTS):
|
||||||
|
try:
|
||||||
|
async with self._session.get(url, params=params) as resp:
|
||||||
|
if resp.content_type == "application/pdf":
|
||||||
|
# Yay, we got the file (as long as it's a PDF)
|
||||||
|
await self._stream_to_path(resp, to_path)
|
||||||
|
return True
|
||||||
|
elif resp.content_type == "text/html":
|
||||||
|
# Dangit, we're probably not logged in.
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
# What *did* we get?
|
||||||
|
raise UnknownFileType(f"Unknown file of type {resp.content_type}.")
|
||||||
|
|
||||||
|
except aiohttp.client_exceptions.ServerDisconnectedError:
|
||||||
|
logger.debug(f"Try {t+1} out of {self.RETRY_ATTEMPTS} failed, retrying in {self.RETRY_DELAY} s")
|
||||||
|
await asyncio.sleep(self.RETRY_DELAY)
|
||||||
|
|
||||||
|
logger.error(f"Could not download {url} params:{params}.")
|
||||||
|
raise OutOfTriesException(f"Try {self.RETRY_ATTEMPTS} out of {self.RETRY_ATTEMPTS} failed.")
|
||||||
|
|
||||||
|
async def download_file(self, file_id, to_path):
|
||||||
|
url = "https://ilias.studium.kit.edu/goto.php"
|
||||||
|
params = {"target": file_id}
|
||||||
|
|
||||||
|
while True:
|
||||||
|
async with self._lock.read():
|
||||||
|
success = await self._download(url, params, to_path)
|
||||||
|
|
||||||
|
if success:
|
||||||
|
return
|
||||||
|
else:
|
||||||
|
await self._ensure_authenticated()
|
||||||
|
Loading…
Reference in New Issue
Block a user