Skip to content

Commit

Permalink
Merge pull request #24 from Wazzaps/bugfix/slow-list
Browse files Browse the repository at this point in the history
make `ampm list` faster
  • Loading branch information
Wazzaps authored Jun 4, 2024
2 parents af21ecf + 48da6fd commit e6f7057
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 15 deletions.
2 changes: 1 addition & 1 deletion ampm/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '1.5.5'
__version__ = '1.5.6'
26 changes: 21 additions & 5 deletions ampm/repo/nfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
import toml
import tqdm
from pyNfsClient import (Portmap, Mount, NFSv3, MNT3_OK, NFS_PROGRAM,
NFS_V3, NFS3_OK, UNCHECKED, NFS3ERR_EXIST, UNSTABLE, NFS3ERR_NOTDIR, NFS3ERR_ISDIR, NFSSTAT3)
NFS_V3, NFS3_OK, UNCHECKED, NFS3ERR_EXIST, UNSTABLE, NFS3ERR_NOTDIR, NFS3ERR_ISDIR, NFSSTAT3, NF3DIR, NFS3ERR_NOTSUPP)

from ampm.repo.base import ArtifactRepo, ArtifactMetadata, ArtifactQuery, QueryNotFoundError, ARTIFACT_TYPES, \
ArtifactCorruptedError, NiceTrySagi
from ampm.repo.local import LOCAL_REPO
from ampm.utils import _calc_dir_size, remove_atexit, LockFile

DEFAULT_CHUNK_SIZE = int(os.environ.get("AMPM_CHUNK_SIZE", str(1024 * 256)))
DEFAULT_CHUNK_SIZE = int(os.environ.get("AMPM_CHUNK_SIZE", str(1024 * 32)))
NFS_OP_TIMEOUT_SEC = 16


Expand Down Expand Up @@ -63,6 +63,7 @@ def __init__(self, host: str, remote_path: str):
self.nfs3: NFSv3 = None
self.root_fh: bytes = None
self.chunk_size_limit = 1024 * 1024 * 1024 # 1 GiB
self.supports_readdirplus = True

self.auth = {
"flavor": 1,
Expand Down Expand Up @@ -234,15 +235,28 @@ def walk_files(self, remote_path: str, include_dirs: bool = False):
cookie = 0
cookie_verf = '0'
while True:
readdir_res = self.nfs3.readdir(fh, cookie=cookie, cookie_verf=cookie_verf)
readdir_res = None
if self.supports_readdirplus:
readdir_res = self.nfs3.readdirplus(fh, cookie=cookie, cookie_verf=cookie_verf)
if readdir_res["status"] == NFS3ERR_NOTSUPP:
self.supports_readdirplus = False

if not self.supports_readdirplus:
readdir_res = self.nfs3.readdir(fh, cookie=cookie, cookie_verf=cookie_verf)

if readdir_res["status"] == NFS3_OK:
if include_dirs:
yield remote_path
cookie_verf = readdir_res["resok"]["cookieverf"]
entry = readdir_res["resok"]["reply"]["entries"]
while entry:
if not entry[0]['name'].startswith(b'.'):
yield from self.walk_files(remote_path + '/' + entry[0]['name'].decode(), include_dirs)
next_path = remote_path + '/' + entry[0]['name'].decode()
if ('name_attributes' in entry[0] and
entry[0]['name_attributes']['attributes']['type'] != NF3DIR):
yield next_path
else:
yield from self.walk_files(next_path, include_dirs)
cookie = entry[0]['cookie']
entry = entry[0]['nextentry']
if readdir_res["resok"]["reply"]["eof"]:
Expand All @@ -251,7 +265,7 @@ def walk_files(self, remote_path: str, include_dirs: bool = False):
yield remote_path
return
else:
raise IOError(f"NFS readdir failed: code={readdir_res['status']} ({NFSSTAT3[readdir_res['status']]})")
raise IOError(f"NFS readdirplus failed: code={readdir_res['status']} ({NFSSTAT3[readdir_res['status']]})")

def walk_files_dirs_at_end(self, remote_path: str):
"""Transform the output of `walk_files` such that the dirs appear after their contents"""
Expand Down Expand Up @@ -645,6 +659,8 @@ def download_metadata_for_type(self, artifact_type: str):
type_extra = matches.group(1)
artifact_hash = matches.group(2)
local_path = LOCAL_REPO.metadata_path_of(artifact_type + type_extra, artifact_hash, '.toml')
if local_path.exists():
continue
tmp_local_path = LOCAL_REPO.metadata_path_of(
artifact_type + type_extra,
artifact_hash,
Expand Down
15 changes: 7 additions & 8 deletions ampm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,14 @@ def _calc_dir_size(path: Path) -> int:

def hash_local_file(local_path: Path) -> str:
hasher = hashlib.sha256(b'')
fd = local_path.open('rb')

while True:
chunk = fd.read(1024 * 1024)
if len(chunk) == 0:
break
hasher.update(chunk)
with local_path.open('rb') as fd:
while True:
chunk = fd.read(1024 * 1024)
if len(chunk) == 0:
break
hasher.update(chunk)

return hasher.hexdigest()
return hasher.hexdigest()


def randbytes(length: int) -> bytes:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,7 +539,7 @@ def test_stress(clean_repos, upload, list_, download):
assert len(artifacts) == COUNT, f"Listing {COUNT} artifacts returned {len(artifacts)} instead"
list_duration = time.time() - t
print(f'Listed {COUNT} artifacts in {list_duration} seconds')
assert list_duration < 30, f"Listing {COUNT} artifacts took too long"
assert list_duration < 10, f"Listing {COUNT} artifacts took too long"


@pytest.mark.parametrize('is_compressed', ['compressed', 'uncompressed'])
Expand Down

0 comments on commit e6f7057

Please sign in to comment.