Skip to content

Commit

Permalink
🚚 Update project structure
Browse files Browse the repository at this point in the history
  • Loading branch information
BalconyJH committed Jan 10, 2025
1 parent e57e123 commit 421eff7
Show file tree
Hide file tree
Showing 19 changed files with 123 additions and 131 deletions.
32 changes: 0 additions & 32 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,48 +20,16 @@ pip install aioarxiv
## Quick Start

```python
import asyncio
from aioarxiv import ArxivClient


async def main():
async with ArxivClient() as client:
async for paper in client.search("quantum computing", max_results=1):
print(f"Title: {paper.title}")
print(f"Authors: {', '.join(a.name for a in paper.authors)}")
print(f"Summary: {paper.summary[:200]}...")

# Download PDF
file_path = await client.download_paper(paper)
print(f"Downloaded to: {file_path}")


if __name__ == "__main__":
asyncio.run(main())
```

## Configuration

```python
from aioarxiv import ArxivConfig, ArxivClient

config = ArxivConfig(
rate_limit_calls=3, # Rate limit per window
rate_limit_period=1.0, # Window period in seconds
max_concurrent_requests=3 # Max concurrent requests
)

client = ArxivClient(config=config)
```

## Error Handling

```python
try:
async for paper in client.search("quantum computing"):
print(paper.title)
except SearchCompleteException:
print("Search complete")
```

## Requirements
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
66 changes: 49 additions & 17 deletions src/aioarxiv/utils/__init__.py → aioarxiv/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,25 @@

def create_trace_config() -> aiohttp.TraceConfig:
"""
创建请求追踪配置。
Create request tracing configuration.
Returns:
aiohttp.TraceConfig: 请求追踪配置
aiohttp.TraceConfig: Request tracing configuration object.
"""

async def _on_request_start(
session: aiohttp.ClientSession,
trace_config_ctx: SimpleNamespace,
params: aiohttp.TraceRequestStartParams,
) -> None:
"""
Callback executed when a request starts.
Args:
session (aiohttp.ClientSession): The client session.
trace_config_ctx (SimpleNamespace): Trace configuration context.
params (aiohttp.TraceRequestStartParams): Request start parameters.
"""
logger.debug(f"Starting request: {params.method} {params.url}")
trace_config_ctx.start_time = monotonic()

Expand All @@ -36,6 +44,14 @@ async def _on_request_end(
trace_config_ctx: SimpleNamespace,
params: aiohttp.TraceRequestEndParams,
) -> None:
"""
Callback executed when a request ends.
Args:
session (aiohttp.ClientSession): The client session.
trace_config_ctx (SimpleNamespace): Trace configuration context.
params (aiohttp.TraceRequestEndParams): Request end parameters.
"""
elapsed_time = monotonic() - trace_config_ctx.start_time
logger.debug(
f"Ending request: {params.response.status} {params.url} - Time elapsed: "
Expand All @@ -56,17 +72,17 @@ def create_parser_exception(
error: Optional[Exception] = None,
) -> ParserException:
"""
创建解析异常, 用于解析xml数据时出现错误。
Create a parsing exception for XML data parsing errors.
Args:
data (ET.Element): 解析失败的数据
url (str): 请求url
message (Optional[str], optional): 异常消息. Defaults to None.
namespace (Optional[str], optional): 命名空间. Defaults to None.
error (Optional[Exception], optional): 原始异常. Defaults to None.
data (ET.Element): The data that failed to parse.
url (Optional[str]): The request URL.
message (Optional[str], optional): Exception message. Defaults to None.
namespace (Optional[str], optional): XML namespace. Defaults to None.
error (Optional[Exception], optional): Original exception. Defaults to None.
Returns:
ParserException: 解析异常
ParserException: The created parsing exception.
"""
return ParserException(
url=url or "",
Expand All @@ -86,15 +102,15 @@ def calculate_page_size(
max_results: Optional[int],
) -> int:
"""
计算单页大小, 限制在配置的单页大小和最大结果数之间。
Calculate page size constrained by configuration page size and maximum results.
Args:
config_page_size (int): 配置的单页大小
start (int): 起始位置
max_results (Optional[int]): 最大结果数
config_page_size (int): Configured page size.
start (int): Starting position.
max_results (Optional[int]): Maximum number of results.
Returns:
int: 单页大小
int: Calculated page size.
"""
if max_results is None:
return config_page_size
Expand All @@ -104,13 +120,18 @@ def calculate_page_size(

def format_datetime(dt: datetime) -> str:
"""
格式化日期时间。
Format datetime to string.
Args:
dt (datetime): 日期时间
dt (datetime): Datetime object to format.
Returns:
str: 格式化后的日期时间, 格式为: %Y-%m-%d_%H-%M-%S_%Z (2024-03-21_15-30-00_CST)
str: Formatted datetime string in format: %Y-%m-%d_%H-%M-%S_%Z
(e.g., 2024-03-21_15-30-00_CST).
Examples:
>>> format_datetime(datetime(2024, 3, 21, 15, 30, 0))
'2024-03-21_15-30-00_CST'
"""
local_dt = dt.astimezone(ZoneInfo(default_config.timezone))
return local_dt.strftime("%Y-%m-%d_%H-%M-%S_%Z")
Expand Down Expand Up @@ -152,6 +173,17 @@ def sanitize_title(title: str, max_length: int = 50) -> str:


def log_retry_attempt(retry_state: RetryCallState) -> None:
"""
Log retry attempt information.
Args:
retry_state (RetryCallState): Current retry state containing attempt
information.
Examples:
>>> log_retry_attempt(RetryCallState(attempt_number=2))
WARNING:root:retry times: 2/3
"""
logger.warning(
f"retry times: {retry_state.attempt_number}/{default_config.max_retries}"
)
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ async def _wait_if_needed(cls, now: float) -> float:
wait_time = cls.timestamps[0] + cls._period - now
if wait_time > 0:
logger.debug(
f"触发速率限制, 等待{wait_time:.2f}",
f"Rate limit reached, waiting for {wait_time:.2f}s",
extra={
"wait_time": f"{wait_time:.2f}s",
"current_calls": len(cls.timestamps),
Expand Down Expand Up @@ -112,7 +112,7 @@ async def wrapper(*args: Any, **kwargs: Any) -> Any:
cls.timestamps.append(now)

logger.debug(
"请求通过限制器",
"request rate limit",
extra={
"current_calls": len(cls.timestamps),
"max_calls": cls._calls,
Expand Down
File renamed without changes.
17 changes: 17 additions & 0 deletions docs/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Welcome to MkDocs

For full documentation visit [mkdocs.org](https://www.mkdocs.org).

## Commands

* `mkdocs new [dir-name]` - Create a new project.
* `mkdocs serve` - Start the live-reloading docs server.
* `mkdocs build` - Build the documentation site.
* `mkdocs -h` - Print help message and exit.

## Project layout

mkdocs.yml # The configuration file.
docs/
index.md # The documentation homepage.
... # Other markdown pages, images and other files.
4 changes: 4 additions & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
site_name: My Docs
site_url: https://mydomain.org/mysite
theme:
name: material
103 changes: 47 additions & 56 deletions tests/test_client/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
from aioarxiv.client.arxiv_client import ArxivClient
from aioarxiv.models import (
Metadata,
SortCriterion,
SortOrder,
)


Expand Down Expand Up @@ -49,7 +47,7 @@ async def test_build_search_metadata(
# 更新 search_result 的元数据
search_result = sample_search_result.model_copy(update={"metadata": metadata})

updated_result = mock_arxiv_client._build_search_metadata(
updated_result = mock_arxiv_client._build_search_result_metadata(
search_result, page=1, batch_size=10, papers=[sample_paper]
)

Expand Down Expand Up @@ -78,59 +76,52 @@ async def test_metadata_duration_calculation(mock_datetime):
assert metadata.duration_ms == 1000.000


@pytest.mark.asyncio
async def test_search_with_params(
mock_arxiv_client, mock_response, mock_session_manager, mock_config
):
"""测试带参数的搜索"""
mock_session_manager.request.return_value = mock_response

params = {
"query": "neural networks",
"max_results": 5,
"sort_by": SortCriterion.SUBMITTED,
"sort_order": SortOrder.ASCENDING,
}

results = []
async for result in mock_arxiv_client.search(**params):
results.append(result)

assert len(results) == 5
result = results[0]

assert result.total_result == 218712
assert result.page == 1
assert len(result.papers) == 1

paper = result.papers[0]
assert paper.info.id == "0102536v1"
assert (
paper.info.title
== "Impact of Electron-Electron Cusp on Configuration Interaction Energies"
)

authors = paper.info.authors
assert len(authors) == 5
assert authors[0].name == "David Prendergast"
assert authors[0].affiliation == "Department of Physics"
assert authors[1].name == "M. Nolan"
assert authors[1].affiliation == "NMRC, University College, Cork, Ireland"

assert paper.doi == "10.1063/1.1383585"
assert paper.journal_ref == "J. Chem. Phys. 115, 1626 (2001)"
assert "11 pages, 6 figures, 3 tables" in paper.comment
assert paper.info.categories.primary.term == "cond-mat.str-el"

call_args = mock_session_manager.request.call_args
assert call_args is not None
_, kwargs = call_args

query_params = kwargs["params"]
assert query_params["search_query"] == "neural networks"
assert query_params["max_results"] == mock_config.page_size
assert query_params["sortBy"] == SortCriterion.SUBMITTED.value
assert query_params["sortOrder"] == SortOrder.ASCENDING.value
# @pytest.mark.asyncio
# async def test_search_with_params(
# mock_arxiv_client, mock_response, mock_session_manager, mock_config
# ):
# """测试带参数的搜索"""
# mock_session_manager.request.return_value = mock_response
#
# params = {
# "query": "neural networks",
# "max_results": 5,
# "sort_by": SortCriterion.SUBMITTED,
# "sort_order": SortOrder.ASCENDING,
# }
#
# result = await mock_arxiv_client.search(**params)
#
# assert result.total_result == 218712
#
# paper = result.papers[0]
# assert paper.info.id == "0102536v1"
# assert (
# paper.info.title
# == "Impact of Electron-Electron Cusp on Configuration Interaction Energies"
# )
#
# authors = paper.info.authors
# assert len(authors) == 5
# assert authors[0].name == "David Prendergast"
# assert authors[0].affiliation == "Department of Physics"
# assert authors[1].name == "M. Nolan"
# assert authors[1].affiliation == "NMRC, University College, Cork, Ireland"
#
# assert paper.doi == "10.1063/1.1383585"
# assert paper.journal_ref == "J. Chem. Phys. 115, 1626 (2001)"
# assert "11 pages, 6 figures, 3 tables" in paper.comment
# assert paper.info.categories.primary.term == "cond-mat.str-el"
#
# call_args = mock_session_manager.request.call_args
# assert call_args is not None
# _, kwargs = call_args
#
# query_params = kwargs["params"]
# assert query_params["search_query"] == "neural networks"
# assert query_params["max_results"] == mock_config.page_size
# assert query_params["sortBy"] == SortCriterion.SUBMITTED.value
# assert query_params["sortOrder"] == SortOrder.ASCENDING.value


def test_search_result_computed_fields(sample_search_result):
Expand Down
4 changes: 1 addition & 3 deletions tests/test_client/test_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,7 @@
from pytest_mock import MockerFixture

from aioarxiv.client.downloader import ArxivDownloader
from aioarxiv.models import (
Paper,
)
from aioarxiv.models import Paper


class MockResponse:
Expand Down
18 changes: 2 additions & 16 deletions tests/test_utils/test_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
from yarl import URL

from aioarxiv.exception import ParserException
from aioarxiv.models import Category, Paper, SearchParams
from aioarxiv.utils.parser import ArxivParser, PaperParser, RootParser
from aioarxiv.models import Category, Paper
from aioarxiv.utils.parser import ArxivParser, PaperParser

SAMPLE_XML_PATH = pathlib.Path(__file__).parent.parent / "data" / "sample.xml"

Expand Down Expand Up @@ -114,20 +114,6 @@ def test_parse_datetime():
assert dt.tzinfo == ZoneInfo("Asia/Shanghai")


def test_root_parser_total_result(sample_xml):
root = RootParser(sample_xml, URL("http://test.com"))
assert root.parse_total_result() == 218712


def test_root_parser_build_search_result(sample_xml):
root = RootParser(sample_xml, URL("http://test.com"))
params = SearchParams(query="test") # pyright: ignore [reportCallIssue]
result = root.build_search_result(params)
assert result.total_result == 218712
assert result.page == 1
assert result.query_params == params


def test_arxiv_parser_build_paper(paper_entry):
paper = ArxivParser.build_paper(paper_entry)
assert isinstance(paper, Paper)
Expand Down
Loading

0 comments on commit 421eff7

Please sign in to comment.