Skip to content

Commit

Permalink
fix: adjust file ordering based on previewer ability
Browse files Browse the repository at this point in the history
also some comments, note that the actual files.order array seems to be dropped by the
Invenio API, slight clean up in a few places
  • Loading branch information
phette23 committed Jun 24, 2024
1 parent b7bef56 commit 64415b9
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 22 deletions.
18 changes: 13 additions & 5 deletions migrate/import.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,13 @@ def create_draft(record: dict) -> dict:
return draft_record


def add_files(dir: Path, attachments: list[dict], draft: dict):
def add_files(dir: Path, record: Record, draft: dict):
# add files to draft record
# three steps: initiate, upload, and commit
# ! Unable to set files order or default_preview like API docs suggest

# initiate all at once
keys = [{"key": attachment["filename"]} for attachment in attachments]
keys = [{"key": att["filename"]} for att in record.files]
init_response: requests.Response = requests.post(
draft["links"]["files"],
data=json.dumps(keys),
Expand All @@ -75,8 +76,8 @@ def add_files(dir: Path, attachments: list[dict], draft: dict):

# upload one by one
# TODO use httpx to do in parallel?
for attachment in attachments:
binary_headers: dict[str, str] = headers
for attachment in record.files:
binary_headers: dict[str, str] = headers.copy()
binary_headers["Content-Type"] = "application/octet-stream"
with open(dir / attachment["filename"], "rb") as f:
upload_response: requests.Response = requests.put(
Expand Down Expand Up @@ -126,17 +127,24 @@ def publish(draft: dict) -> dict:
)
@click.help_option("-h", "--help")
@click.argument("dir", type=click.Path(exists=True), required=True)
# TODO option to ignore errors which skips assert statements & response.raise_for_status()
# @click.option("--ignore-errors", "-i", help="Ignore errors and continue", is_flag=True)
@click.option("--verbose", "-v", "is_verbose", help="Print more output", is_flag=True)
def main(dir: str, is_verbose: bool):
global verbose
verbose = is_verbose

item = get_item(Path(dir) / "metadata" / "item.json")
record = Record(item)

click.echo(f"Importing {record.title} from {dir}...")
draft = create_draft(record.get())
add_files(Path(dir), record.files, draft)

if len(record.files):
add_files(Path(dir), record, draft)

published_record = publish(draft)

# TODO add to community
click.echo(f"Published: {published_record['links']['self_html']}")

Expand Down
4 changes: 3 additions & 1 deletion migrate/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,9 +464,11 @@ def get(self) -> dict[str, Any]:
},
# ! blocked until we know what custom fields we'll have
"custom_fields": {},
# TODO add files, figure out best one to show first (prefer image formats?)
"files": {
"enabled": bool(len(self.files)),
# ! API drops these, whether we define before adding files or after
"order": [att["filename"] for att in self.files],
"default_preview": self.files[0]["filename"] if len(self.files) else "",
},
# "files": {
# "enabled": bool(len(self.files)),
Expand Down
12 changes: 6 additions & 6 deletions migrate/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ def test_mklist(input, expect):
([], []),
([{"filename": "file.py"}], [{"filename": "file.py"}]),
(
[{"filename": "binary.gzip"}, {"filename": "film.mov"}],
[{"filename": "film.mov"}, {"filename": "binary.gzip"}],
[{"filename": "binary.gzip"}, {"filename": "img.gif"}],
[{"filename": "img.gif"}, {"filename": "binary.gzip"}],
),
(
[{"filename": "print.pdf"}, {"filename": "word.docx"}],
Expand Down Expand Up @@ -60,14 +60,14 @@ def test_mklist(input, expect):
],
[
{"filename": "img.tiff"},
{"filename": "img.webp"},
{"filename": "movie.mp4"},
{"filename": "doc.pdf"},
{"filename": "zip.zip"},
{"filename": "unknown"},
{"filename": "plain.txt"},
{"filename": "img.webp"},
{"filename": "movie.mp4"},
{"filename": "song.mp3"},
{"filename": "app.exe"},
{"filename": "zip.zip"},
{"filename": "unknown"},
],
),
],
Expand Down
20 changes: 10 additions & 10 deletions migrate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,25 +59,25 @@ def to_edtf(s) -> str | None:


def visual_mime_type_sort(attachment) -> int:
# Sort EQUELLA attachment dicts by MIME type, prefer visual types
# Order: TIFF > Other images > Video > PDF > Other Text > Binary ("application") > Unknown
# Sort EQUELLA attachment dicts by MIME type, types previewable in Invenio
# which is (according to readme): PDF, ZIP, CSV, MARKDOWN, XML, JSON, PNG, JPG, GIF
# https://github.com/inveniosoftware/invenio-previewer
# Order: TIFF > Non-HEIC/WBEP Images > PDF > Markdown, CSV, XML > JSON > ZIP > Everything else (not previewable)
# https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types#types
mt: str | None = mimetypes.guess_type(attachment["filename"])[0]
type, subtype = mt.split("/") if mt else ("unknown", "unknown")
match type, subtype:
case "image", "tiff":
return 0
case "image", _:
case "image", _ if subtype not in ["heic", "webp"]:
return 10
case "video", _:
return 20
case "application", "pdf":
return 20
case "text", _ if subtype in ["csv", "markdown", "xml"]:
return 30
case "text", _:
case "application", "json":
return 40
case "audio", _:
case "application", _ if subtype in ["zip", "x-zip-compressed"]:
return 50
case "application", _:
case _, _: # model, font types, subtypes not covered above
return 60
case _, _: # model, font
return 70

0 comments on commit 64415b9

Please sign in to comment.