From 9001cab926605dd5ebf5493a8a4f734cf1612437 Mon Sep 17 00:00:00 2001 From: Olof Olsson Date: Sun, 17 Sep 2023 16:26:16 +0200 Subject: [PATCH] wip: add checks for dir and list option #4 --list-ony flag will skip download --- README.md | 8 +++++++- daget/__main__.py | 30 ++++++++++++++++++------------ daget/exceptions.py | 1 - daget/utils.py | 4 ++-- pyproject.toml | 4 ++-- 5 files changed, 29 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index 8676832..bda9d68 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,8 @@ Simple utility to download datasets from data respositories. +The goal of this project is to explore machine readable metadata and learn more about writing python packages. + ⚠️ __script is in early development and needs testing__ ⚠️ ## Installation @@ -32,6 +34,10 @@ or short form doi: ## Semi-suported respositories * Figshare - https://su.figshare.com & https://figshare.scilifelab.se (more testing needed) +## Alternatives + +* [datahugger](https://github.com/J535D165/datahugger/) - wider repository suport + ## Improve the script Adding suport for additional repositories requires test cases and investigation arround how to get file metadata from the landing page. @@ -41,6 +47,6 @@ Please help by testing and reporting [issues](https://github.com/borsna/daget/is ## TODO - [ ] Add error handling -- [ ] Check empty destination directory +- [x] Check empty destination directory - [ ] Improve documentation - [x] Package script for pip diff --git a/daget/__main__.py b/daget/__main__.py index 70c01bc..41b1822 100755 --- a/daget/__main__.py +++ b/daget/__main__.py @@ -15,29 +15,35 @@ def main(): parser.add_argument('url', help="URL/DOI to the dataset") parser.add_argument('destination', help="Full or relative path to destination directory") + parser.add_argument("--list-only", action="store_true", help="Skip download") args = parser.parse_args() - desitnation = os.path.realpath(args.destination) - - if not os.path.exists(desitnation): - os.makedirs(desitnation) - - print("destination: ", desitnation) - + # get doi/url and resolve to landing page try: url = get_redirect_url(args.url) except ResolveError as err: - print(bcolors.FAIL, "error resolving ", args.url, bcolors.ENDC) + print(bcolors.FAIL, f'error resolving {args.url}', bcolors.ENDC) exit(1) - print("landing page: ", url) + print(f'landing page: {url}') + + # get desitnation directory and create directory + desitnation = os.path.realpath(args.destination) + + if not os.path.exists(desitnation): + os.makedirs(desitnation) + else: + if len(os.listdir(desitnation)) != 0: + print(bcolors.FAIL, f'{desitnation} must be a empty directory or new directory path', bcolors.ENDC) + exit(1) + + print(f'destination: {desitnation}') files = get_file_list_from_repo(url) total_size = 0 - print(bcolors.BOLD, "size", "\t", "path", bcolors.ENDC) for file in files: total_size += file['size'] print(bcolors.OKBLUE, size_as_string(file['size']).strip(), bcolors.ENDC, file['name']) @@ -46,8 +52,8 @@ def main(): if not os.path.exists(file_dir): os.makedirs(file_dir) - - download_file(file['url'], file_path) + if(not args.list_only): + download_file(file['url'], file_path) print(bcolors.OKGREEN, bcolors.BOLD, size_as_string(total_size), bcolors.ENDC, "downloaded ") diff --git a/daget/exceptions.py b/daget/exceptions.py index 8858511..87ec07d 100644 --- a/daget/exceptions.py +++ b/daget/exceptions.py @@ -1,6 +1,5 @@ class ResolveError(ValueError): pass - class RepoError(Exception): pass \ No newline at end of file diff --git a/daget/utils.py b/daget/utils.py index 9001be9..9f94551 100644 --- a/daget/utils.py +++ b/daget/utils.py @@ -1,5 +1,5 @@ import urllib, urllib.error -from exceptions import RepoError, ResolveError +from daget.exceptions import RepoError, ResolveError def get_redirect_url(url): @@ -14,7 +14,7 @@ def get_redirect_url(url): r = urllib.request.urlopen(url) return r.geturl() except urllib.error.HTTPError: - raise ResolveError("url not found") + raise ResolveError(f"{url} not found") def download_file(url, target): opener = urllib.request.build_opener() diff --git a/pyproject.toml b/pyproject.toml index cb3e28e..b5aea0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "daget" -version = "0.3" +version = "0.4" description = "Download dataset via DOI or landing page url" authors = [ { name = "Olof Olsson", email = "borsna@gmail.com" } @@ -20,7 +20,7 @@ dependencies = ["requests"] requires-python = ">=3.6" [project.scripts] -datahugger = "daget.__main__:main" +daget = "daget.__main__:main" [build-system] requires = ["setuptools>=61.0"]