Skip to content

Commit

Permalink
wip: add checks for dir and list option
Browse files Browse the repository at this point in the history
#4 --list-ony flag will skip download
  • Loading branch information
borsna committed Sep 17, 2023
1 parent 408b28c commit 9001cab
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 18 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

Simple utility to download datasets from data respositories.

The goal of this project is to explore machine readable metadata and learn more about writing python packages.

⚠️ __script is in early development and needs testing__ ⚠️

## Installation
Expand Down Expand Up @@ -32,6 +34,10 @@ or short form doi:
## Semi-suported respositories
* Figshare - https://su.figshare.com & https://figshare.scilifelab.se (more testing needed)

## Alternatives

* [datahugger](https://github.com/J535D165/datahugger/) - wider repository suport

## Improve the script

Adding suport for additional repositories requires test cases and investigation arround how to get file metadata from the landing page.
Expand All @@ -41,6 +47,6 @@ Please help by testing and reporting [issues](https://github.com/borsna/daget/is
## TODO

- [ ] Add error handling
- [ ] Check empty destination directory
- [x] Check empty destination directory
- [ ] Improve documentation
- [x] Package script for pip
30 changes: 18 additions & 12 deletions daget/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,29 +15,35 @@ def main():

parser.add_argument('url', help="URL/DOI to the dataset")
parser.add_argument('destination', help="Full or relative path to destination directory")
parser.add_argument("--list-only", action="store_true", help="Skip download")

args = parser.parse_args()

desitnation = os.path.realpath(args.destination)

if not os.path.exists(desitnation):
os.makedirs(desitnation)

print("destination: ", desitnation)

# get doi/url and resolve to landing page
try:
url = get_redirect_url(args.url)
except ResolveError as err:
print(bcolors.FAIL, "error resolving ", args.url, bcolors.ENDC)
print(bcolors.FAIL, f'error resolving {args.url}', bcolors.ENDC)
exit(1)

print("landing page: ", url)
print(f'landing page: {url}')

# get desitnation directory and create directory
desitnation = os.path.realpath(args.destination)

if not os.path.exists(desitnation):
os.makedirs(desitnation)
else:
if len(os.listdir(desitnation)) != 0:
print(bcolors.FAIL, f'{desitnation} must be a empty directory or new directory path', bcolors.ENDC)
exit(1)

print(f'destination: {desitnation}')

files = get_file_list_from_repo(url)

total_size = 0

print(bcolors.BOLD, "size", "\t", "path", bcolors.ENDC)
for file in files:
total_size += file['size']
print(bcolors.OKBLUE, size_as_string(file['size']).strip(), bcolors.ENDC, file['name'])
Expand All @@ -46,8 +52,8 @@ def main():

if not os.path.exists(file_dir):
os.makedirs(file_dir)

download_file(file['url'], file_path)
if(not args.list_only):
download_file(file['url'], file_path)

print(bcolors.OKGREEN, bcolors.BOLD, size_as_string(total_size), bcolors.ENDC, "downloaded ")

Expand Down
1 change: 0 additions & 1 deletion daget/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
class ResolveError(ValueError):
pass


class RepoError(Exception):
pass
4 changes: 2 additions & 2 deletions daget/utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import urllib, urllib.error
from exceptions import RepoError, ResolveError
from daget.exceptions import RepoError, ResolveError


def get_redirect_url(url):
Expand All @@ -14,7 +14,7 @@ def get_redirect_url(url):
r = urllib.request.urlopen(url)
return r.geturl()
except urllib.error.HTTPError:
raise ResolveError("url not found")
raise ResolveError(f"{url} not found")

def download_file(url, target):
opener = urllib.request.build_opener()
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "daget"
version = "0.3"
version = "0.4"
description = "Download dataset via DOI or landing page url"
authors = [
{ name = "Olof Olsson", email = "[email protected]" }
Expand All @@ -20,7 +20,7 @@ dependencies = ["requests"]
requires-python = ">=3.6"

[project.scripts]
datahugger = "daget.__main__:main"
daget = "daget.__main__:main"

[build-system]
requires = ["setuptools>=61.0"]
Expand Down

0 comments on commit 9001cab

Please sign in to comment.