Skip to content

Commit

Permalink
PUBDEV-6556 - S3 tests failing non-deterministically in hadoop pipeli…
Browse files Browse the repository at this point in the history
…ne (h2oai#3579)

* PUBDEV-6556 - S3 test fails due to a file not yet existing in S3

* S3 HDP import/export test uses millisecond precision in exported file name
  • Loading branch information
Pavel Pscheidl authored Jun 10, 2019
1 parent ef6f830 commit 73b5d9e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
16 changes: 13 additions & 3 deletions h2o-hadoop-2/tests/python/pyunit_s3_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,26 @@
def s3_import_export():
local_frame = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
for scheme in ["s3n", "s3a"]:
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S")
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S.%f")
unique_suffix = str(uuid.uuid4())
s3_path = scheme + "://test.0xdata.com/h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
timestamp + "." + unique_suffix + ".csv.zip"
h2o.export_file(local_frame, s3_path)

s3 = boto3.resource('s3')
client = boto3.client('s3')
# S3 might have a delay in indexing the file (usually milliseconds or hundreds of milliseconds)
# Wait for the file to be available, if not available in the biginning, try every 2 seconds, up to 10 times
client.get_waiter('object_exists').wait(Bucket='test.0xdata.com',
Key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
timestamp + "." + unique_suffix + ".csv.zip",
WaiterConfig={
'Delay': 2,
'MaxAttempts': 10
})
s3_frame = h2o.import_file(s3_path)
assert_frame_equal(local_frame.as_data_frame(), s3_frame.as_data_frame())

#Delete the file afterwards
s3 = boto3.resource('s3')
s3.Object(bucket_name='test.0xdata.com', key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
timestamp + "." + unique_suffix + ".csv.zip").delete()

Expand Down
18 changes: 14 additions & 4 deletions h2o-hadoop-3/tests/python/pyunit_s3_import_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,27 @@

def s3_import_export():
local_frame = h2o.import_file(path=pyunit_utils.locate("smalldata/logreg/prostate.csv"))
for scheme in ["s3a"]: # s3n is deprecated since HDP3/CDH6
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S")
for scheme in ["s3a"]: # s3n is deprecated since HDP3/CDH6
timestamp = datetime.today().utcnow().strftime("%Y%m%d-%H%M%S.%f")
unique_suffix = str(uuid.uuid4())
s3_path = scheme + "://test.0xdata.com/h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
timestamp + "." + unique_suffix + ".csv.zip"
h2o.export_file(local_frame, s3_path)

s3 = boto3.resource('s3')
client = boto3.client('s3')
# S3 might have a delay in indexing the file (usually milliseconds or hundreds of milliseconds)
# Wait for the file to be available, if not available in the biginning, try every 2 seconds, up to 10 times
client.get_waiter('object_exists').wait(Bucket='test.0xdata.com',
Key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
timestamp + "." + unique_suffix + ".csv.zip",
WaiterConfig={
'Delay': 2,
'MaxAttempts': 10
})
s3_frame = h2o.import_file(s3_path)
assert_frame_equal(local_frame.as_data_frame(), s3_frame.as_data_frame())

#Delete the file afterwards
s3 = boto3.resource('s3')
s3.Object(bucket_name='test.0xdata.com', key="h2o-hadoop-tests/test-export/" + scheme + "/exported." + \
timestamp + "." + unique_suffix + ".csv.zip").delete()

Expand Down

0 comments on commit 73b5d9e

Please sign in to comment.