From ddf6fea23fab243048eb3840633cd6c96777e865 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Tue, 16 Apr 2024 17:41:26 +0300 Subject: [PATCH] Add benchmark --- README.md | 6 ++- benchmark/1_threads.csv | 6 +++ benchmark/4_threads.csv | 6 +++ benchmark/README.md | 17 +++++++++ benchmark/benchmark.py | 77 ++++++++++++++++++++++++++++++++++++++ benchmark/requirements.txt | 8 ++++ benchmark/server.py | 29 ++++++++++++++ 7 files changed, 147 insertions(+), 2 deletions(-) create mode 100644 benchmark/1_threads.csv create mode 100644 benchmark/4_threads.csv create mode 100644 benchmark/README.md create mode 100644 benchmark/benchmark.py create mode 100644 benchmark/requirements.txt create mode 100644 benchmark/server.py diff --git a/README.md b/README.md index 4324873..9e2b425 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,10 @@ ![Python >= 3.8](https://img.shields.io/badge/python->=3.8-red.svg) [![](https://badgen.net/github/release/deedy5/pyreqwest-impersonate)](https://github.com/deedy5/pyreqwest-impersonate/releases) [![](https://badge.fury.io/py/pyreqwest_impersonate.svg)](https://pypi.org/project/pyreqwest_impersonate) [![Downloads](https://static.pepy.tech/badge/pyreqwest_impersonate/week)](https://pepy.tech/project/pyreqwest_impersonate) [![CI](https://github.com/deedy5/pyreqwest-impersonate/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/deedy5/pyreqwest-impersonate/actions/workflows/CI.yml) # Pyreqwest_impersonate -HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints.
-Binding to the Rust `reqwest_impersonate` library. +The fastest python HTTP client that can impersonate web browsers by mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints.
+Binding to the Rust `reqwest_impersonate` library.
+🏁 Check the benchmarks for more details. + Provides precompiled wheels: - [x] Linux: `amd64`, `aarch64`. diff --git a/benchmark/1_threads.csv b/benchmark/1_threads.csv new file mode 100644 index 0000000..8e05bc9 --- /dev/null +++ b/benchmark/1_threads.csv @@ -0,0 +1,6 @@ +name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k +curl_cffi,1,5.735,1.521,7.957,3.25 +httpx,1,3.801,2.116,6.117,3.987 +pyreqwest_impersonate,1,0.855,0.297,1.977,1.238 +requests,1,5.787,2.814,8.355,4.73 +tls_client,1,6.414,1.96,6.941,3.153 diff --git a/benchmark/4_threads.csv b/benchmark/4_threads.csv new file mode 100644 index 0000000..b79c28d --- /dev/null +++ b/benchmark/4_threads.csv @@ -0,0 +1,6 @@ +name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k +curl_cffi,4,4.014,1.255,1.567,0.866 +httpx,4,2.105,1.461,1.505,1.307 +pyreqwest_impersonate,4,1.15,0.399,0.875,0.751 +requests,4,4.14,3.006,3.356,2.802 +tls_client,4,3.803,1.357,1.382,0.832 diff --git a/benchmark/README.md b/benchmark/README.md new file mode 100644 index 0000000..a6f6e6e --- /dev/null +++ b/benchmark/README.md @@ -0,0 +1,17 @@ +## Benchmark + +Benchmark between `pyreqwests_impersonate` and other python http clients: + +- curl_cffi +- httpx +- pyreqwests_impersonate +- python-tls-client +- requests + +All the clients run with session/client enabled. +Server response is gzipped. + +#### Run benchmark: + +- run server: `uvicorn server:app` +- run benchmark: `python benchmark.py` diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py new file mode 100644 index 0000000..801ea34 --- /dev/null +++ b/benchmark/benchmark.py @@ -0,0 +1,77 @@ +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +import pandas as pd +import requests +import httpx +import tls_client +import pyreqwest_impersonate +import curl_cffi.requests + +results = [] + + +def session_get_test(session_class, requests_number): + s = session_class() + for _ in range(requests_number): + s.get(url).text + +# one thread +requests_number = 2000 +for response_size in ["5k", "50k"]: + url = f"http://127.0.0.1:8000/{response_size}" + print(f"\nOne worker, {response_size=}, {requests_number=}") + for name, session_class in [ + ("requests", requests.Session), + ("httpx", httpx.Client), + ("tls_client", tls_client.Session), + ("curl_cffi", curl_cffi.requests.Session), + ("pyreqwest_impersonate", pyreqwest_impersonate.Client), + ]: + start = time.perf_counter() + cpu_start = time.process_time() + session_get_test(session_class, requests_number) + dur = round(time.perf_counter() - start, 3) + cpu_dur = round(time.process_time() - cpu_start, 3) + results.append({"name": name, "threads": 1, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur}) + print(f" name: {name:<22} {response_size=} {dur=} {cpu_dur=}") + + +# multiple threads +requests_number = 500 +threads_number = 4 +for response_size in ["5k", "50k"]: + url = f"http://127.0.0.1:8000/{response_size}" + print(f"\n{threads_number} workers, {response_size=}, {requests_number=}") + for name, session_class in [ + ("requests", requests.Session), + ("httpx", httpx.Client), + ("tls_client", tls_client.Session), + ("curl_cffi", curl_cffi.requests.Session), + ("pyreqwest_impersonate", pyreqwest_impersonate.Client), + ]: + start = time.perf_counter() + cpu_start = time.process_time() + with ThreadPoolExecutor(threads_number) as executor: + futures = [executor.submit(session_get_test, session_class, requests_number) for _ in range(threads_number)] + for f in as_completed(futures): + f.result() + dur = round(time.perf_counter() - start, 3) + cpu_dur = round(time.process_time() - cpu_start, 3) + results.append({"name": name, "threads": threads_number, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur}) + print(f" name: {name:<22} {response_size=} {dur=} {cpu_dur=}") + + +df = pd.DataFrame(results) +pivot_df = df.pivot_table(index=['name', 'threads'], columns='response_size', values=['duration', 'cpu_time'], aggfunc='mean') +pivot_df.reset_index(inplace=True) +pivot_df.columns = [' '.join(col).strip() for col in pivot_df.columns.values] +pivot_df = pivot_df[['name', 'threads'] + [col for col in pivot_df.columns if col not in ['name', 'threads']]] +unique_threads = pivot_df['threads'].unique() +for thread in unique_threads: + thread_df = pivot_df[pivot_df['threads'] == thread] + print(f"\nTable for {thread} threads:") + print(thread_df.to_string(index=False)) + thread_df.to_csv(f'{thread}_threads.csv', index=False) + + + diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt new file mode 100644 index 0000000..0d38cd6 --- /dev/null +++ b/benchmark/requirements.txt @@ -0,0 +1,8 @@ +pandas +starlette +uvicorn +requests +httpx +tls-client +pyreqwest_impersonate +curl_cffi diff --git a/benchmark/server.py b/benchmark/server.py new file mode 100644 index 0000000..34c296c --- /dev/null +++ b/benchmark/server.py @@ -0,0 +1,29 @@ +import base64 +import os +import gzip +from starlette.applications import Starlette +from starlette.responses import Response +from starlette.routing import Route + +random_5k = base64.b64encode(os.urandom(5 * 1024)).decode('utf-8') +random_5k = gzip.compress(random_5k.encode('utf-8')) + +random_50k = base64.b64encode(os.urandom(50 * 1024)).decode('utf-8') +random_50k = gzip.compress(random_50k.encode('utf-8')) + + +def gzip_response(gzipped_content): + headers = { + 'Content-Encoding': 'gzip', + 'Content-Length': str(len(gzipped_content)), + } + return Response(gzipped_content, headers=headers) + +app = Starlette( + routes=[ + Route("/5k", lambda r: gzip_response(random_5k)), + Route("/50k", lambda r: gzip_response(random_50k)), + ], +) + +# Run server: uvicorn server:app