Remove unnecessary files

This commit is contained in:
Joscha 2019-04-25 19:17:50 +00:00
parent dfddc93039
commit f1ba618378
3 changed files with 0 additions and 211 deletions

View File

@ -1,80 +0,0 @@
# From https://charemza.name/blog/posts/python/asyncio/read-write-lock/
# https://gist.github.com/michalc/ab9bd571cfab09216c0316f2302a76b0#file-asyncio_read_write_lock-py
import asyncio
import collections
import contextlib
class _ReadWaiter(asyncio.Future):
pass
class _WriteWaiter(asyncio.Future):
pass
class ReadWriteLock():
def __init__(self):
self._waiters = collections.deque()
self._reads_held = 0
self._write_held = False
def _pop_queued_waiters(self, waiter_type):
while True:
correct_type = self._waiters and isinstance(self._waiters[0], waiter_type)
cancelled = self._waiters and self._waiters[0].cancelled()
if correct_type or cancelled:
waiter = self._waiters.popleft()
if correct_type and not cancelled:
yield waiter
if not correct_type and not cancelled:
break
def _resolve_queued_waiters(self):
if not self._write_held:
for waiter in self._pop_queued_waiters(_ReadWaiter):
self._reads_held += 1
waiter.set_result(None)
if not self._write_held and not self._reads_held:
for waiter in self._pop_queued_waiters(_WriteWaiter):
self._write_held = True
waiter.set_result(None)
break
def _on_read_release(self):
self._reads_held -= 1
def _on_write_release(self):
self._write_held = False
@contextlib.asynccontextmanager
async def _acquire(self, waiter_type, on_release):
waiter = waiter_type()
self._waiters.append(waiter)
self._resolve_queued_waiters()
try:
await waiter
except asyncio.CancelledError:
self._resolve_queued_waiters()
raise
try:
yield
finally:
on_release()
self._resolve_queued_waiters()
@contextlib.asynccontextmanager
async def read(self):
async with self._acquire(_ReadWaiter, self._on_read_release):
yield
@contextlib.asynccontextmanager
async def write(self):
async with self._acquire(_WriteWaiter, self._on_write_release):
yield

87
plan.md
View File

@ -1,87 +0,0 @@
# Config
- python script as config
- imports PFERD as library
- operates relative to its own path
## Example folder structure
```
.../
Vorlesungen/
PFERD/ as symlink, locally or in python's import path
PFERDconf.py does things relative to its own location (hopefully)
GBI/ folder to synchronize files into
...
Prog/ folder to synchronize files into
...
```
## Example config
```python
import PFERD
def gbi_filter(ilias_path):
... # rename and move files, or filter them
return local_path # or None if file should be ignored
kit = PFERD.KIT()
kit.synchronize("crs_855240", "GBI", filter=gbi_filter)
kit.synchronize("crs_851237", "Prog") # default filter preserves paths
```
# Structure
## Things that need to be done
- figure out where config file is located
- get and store shibboleth session cookie
- get and store ilias session cookie
- download specific ilias urls (mostly goto.php, probably)
- parse web page
- determine if logging in is necessary
- authenticate if necessary
- don't re-login if shibboleth session cookie is still valid
- find folders in current folder
- find files to download in current folder
- ignore LA1 test thingy
- crawl folders and create directory-like structure/file paths
- use filter function for paths
- download files to local file paths
- create folders as necessary
- remember downloaded files
- find files that were not previously downloaded
- remove un-downloaded files
- remove unnecessary folders (prompt user before deleting!)
- logging
- display crawl progress
- display structure of found files using neat box drawing characters
- display download progress
## How those things are usually done
Step 3. to 5. are run for each synchronize() call.
1. launch script
- load cookie files
2. authenticate
(assuming enough time has passed for the session cookies to become invalid)
- prompt user for username
- prompt user for password (no echo)
- somehow obtain valid session cookies
3. crawl
- start at the id specified in synchronize() args
- search for folders and files to download
- build directory structure
4. download
- run each path through filter function
- if file was not filtered:
- download file and save result to filtered path
- use sync directory specified in synchronize() args
- remember the filtered path for later
5. cleanup
- search sync directory for files
- for each file not previously downloaded:
- prompt user if they want to delete the file (default: No)
- delete file if user answered Yes

44
test.py
View File

@ -1,44 +0,0 @@
import PFERD
import asyncio
import logging
import pathlib
import os
import sys
logging.basicConfig(level=logging.DEBUG, format=PFERD.LOG_FORMAT)
#logging.basicConfig(level=logging.INFO, format=PFERD.LOG_FORMAT)
async def test_download():
base_path = pathlib.Path(".")
sync_path = pathlib.Path(base_path, "synctest")
orga = PFERD.Organizer(base_path, sync_path)
auth = PFERD.ShibbolethAuthenticator(cookie_path="cookie_jar")
#soup = await auth.get_webpage("885157")
orga.clean_temp_dir()
filename = orga.temp_file()
await auth.download_file("file_886544_download", filename)
orga.add_file(filename, pathlib.Path("test.pdf"))
filename = orga.temp_file()
await auth.download_file("file_886544_download", filename)
orga.add_file(filename, pathlib.Path("bla/test2.pdf"))
orga.clean_sync_dir()
orga.clean_temp_dir()
await auth.close()
def main():
#print(f" os.getcwd(): {os.getcwd()}")
#print(f" sys.argv[0]: {sys.argv[0]}")
#print(f" both: {os.path.dirname(os.getcwd() + '/' + sys.argv[0])}")
#print(f" __file__: {__file__}")
#print(f"stackoverflow: {os.path.dirname(os.path.abspath(__file__))}")
#asyncio.run(test_download(), debug=True)
asyncio.run(test_download())
if __name__ == "__main__":
main()