Skip to content

Commit

Permalink
feat(airbyte-cdk) Async jobs - Limit memory usage (#46286)
Browse files Browse the repository at this point in the history
  • Loading branch information
maxi297 authored Oct 2, 2024
1 parent 9dc9442 commit b0cac50
Show file tree
Hide file tree
Showing 4 changed files with 53 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

EMPTY_STR: str = ""
DEFAULT_ENCODING: str = "utf-8"
DOWNLOAD_CHUNK_SIZE: int = 1024 * 1024 * 10
DOWNLOAD_CHUNK_SIZE: int = 1024 * 10


class ResponseToFileExtractor(RecordExtractor):
Expand Down
18 changes: 18 additions & 0 deletions airbyte-cdk/python/unit_tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,21 @@ def mock_sleep(monkeypatch):
with freezegun.freeze_time(datetime.datetime.now(), ignore=["_pytest.runner", "_pytest.terminal"]) as frozen_datetime:
monkeypatch.setattr("time.sleep", lambda x: frozen_datetime.tick(x))
yield


def pytest_addoption(parser):
parser.addoption(
"--skipslow", action="store_true", default=False, help="skip slow tests"
)


def pytest_configure(config):
config.addinivalue_line("markers", "slow: mark test as slow to run")


def pytest_collection_modifyitems(config, items):
if config.getoption("--skipslow"):
skip_slow = pytest.mark.skip(reason="--skipslow option has been provided and this test is marked as slow")
for item in items:
if "slow" in item.keywords:
item.add_marker(skip_slow)
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def large_event_response_fixture():
os.remove(file_path)


@pytest.mark.slow
@pytest.mark.limit_memory("20 MB")
def test_jsonl_decoder_memory_usage(requests_mock, large_events_response):
lines_in_response, file_path = large_events_response
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
import csv
import os
from io import BytesIO
from pathlib import Path
from unittest import TestCase

import pytest
import requests
import requests_mock
from airbyte_cdk.sources.declarative.extractors import ResponseToFileExtractor
Expand Down Expand Up @@ -52,3 +55,33 @@ def _mock_streamed_response(self, io: BytesIO) -> requests.Response:
any_url = "https://anyurl.com"
self._http_mocker.register_uri("GET", any_url, [{"body": io, "status_code": 200}])
return requests.get(any_url)


@pytest.fixture(name="large_events_response")
def large_event_response_fixture():
lines_in_response = 2_000_000 # ≈ 62 MB of response
dir_path = os.path.dirname(os.path.realpath(__file__))
file_path = f"{dir_path}/test_response.csv"
with open(file_path, "w") as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(["username", "email"]) # headers
for _ in range(lines_in_response):
csv_writer.writerow(["a_username","[email protected]"])
yield (lines_in_response, file_path)
os.remove(file_path)


@pytest.mark.slow
@pytest.mark.limit_memory("20 MB")
def test_response_to_file_extractor_memory_usage(requests_mock, large_events_response):
lines_in_response, file_path = large_events_response
extractor = ResponseToFileExtractor()

url = "https://for-all-mankind.nasa.com/api/v1/users/users1"
requests_mock.get(url, body=open(file_path, "rb"))

counter = 0
for _ in extractor.extract_records(requests.get(url, stream=True)):
counter += 1

assert counter == lines_in_response

0 comments on commit b0cac50

Please sign in to comment.