From ddf6fea23fab243048eb3840633cd6c96777e865 Mon Sep 17 00:00:00 2001
From: deedy5 <65482418+deedy5@users.noreply.github.com>
Date: Tue, 16 Apr 2024 17:41:26 +0300
Subject: [PATCH] Add benchmark

---
 README.md                  |  6 ++-
 benchmark/1_threads.csv    |  6 +++
 benchmark/4_threads.csv    |  6 +++
 benchmark/README.md        | 17 +++++++++
 benchmark/benchmark.py     | 77 ++++++++++++++++++++++++++++++++++++++
 benchmark/requirements.txt |  8 ++++
 benchmark/server.py        | 29 ++++++++++++++
 7 files changed, 147 insertions(+), 2 deletions(-)
 create mode 100644 benchmark/1_threads.csv
 create mode 100644 benchmark/4_threads.csv
 create mode 100644 benchmark/README.md
 create mode 100644 benchmark/benchmark.py
 create mode 100644 benchmark/requirements.txt
 create mode 100644 benchmark/server.py
diff --git a/README.md b/README.md
index 4324873..9e2b425 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
 ![Python >= 3.8](https://img.shields.io/badge/python->=3.8-red.svg) [![](https://badgen.net/github/release/deedy5/pyreqwest-impersonate)](https://github.com/deedy5/pyreqwest-impersonate/releases) [![](https://badge.fury.io/py/pyreqwest_impersonate.svg)](https://pypi.org/project/pyreqwest_impersonate) [![Downloads](https://static.pepy.tech/badge/pyreqwest_impersonate/week)](https://pepy.tech/project/pyreqwest_impersonate) [![CI](https://github.com/deedy5/pyreqwest-impersonate/actions/workflows/CI.yml/badge.svg?branch=main)](https://github.com/deedy5/pyreqwest-impersonate/actions/workflows/CI.yml)
 # Pyreqwest_impersonate
 
-HTTP client that can impersonate web browsers, mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints.</br>
-Binding to the Rust `reqwest_impersonate` library.
+The fastest python HTTP client that can impersonate web browsers by mimicking their headers and `TLS/JA3/JA4/HTTP2` fingerprints.</br>
+Binding to the Rust `reqwest_impersonate` library.</br>
+🏁 Check the benchmarks for more details.
+
 
 Provides precompiled wheels:
 - [x] Linux:  `amd64`, `aarch64`.
diff --git a/benchmark/1_threads.csv b/benchmark/1_threads.csv
new file mode 100644
index 0000000..8e05bc9
--- /dev/null
+++ b/benchmark/1_threads.csv
@@ -0,0 +1,6 @@
+name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k
+curl_cffi,1,5.735,1.521,7.957,3.25
+httpx,1,3.801,2.116,6.117,3.987
+pyreqwest_impersonate,1,0.855,0.297,1.977,1.238
+requests,1,5.787,2.814,8.355,4.73
+tls_client,1,6.414,1.96,6.941,3.153
diff --git a/benchmark/4_threads.csv b/benchmark/4_threads.csv
new file mode 100644
index 0000000..b79c28d
--- /dev/null
+++ b/benchmark/4_threads.csv
@@ -0,0 +1,6 @@
+name,threads,cpu_time 50k,cpu_time 5k,duration 50k,duration 5k
+curl_cffi,4,4.014,1.255,1.567,0.866
+httpx,4,2.105,1.461,1.505,1.307
+pyreqwest_impersonate,4,1.15,0.399,0.875,0.751
+requests,4,4.14,3.006,3.356,2.802
+tls_client,4,3.803,1.357,1.382,0.832
diff --git a/benchmark/README.md b/benchmark/README.md
new file mode 100644
index 0000000..a6f6e6e
--- /dev/null
+++ b/benchmark/README.md
@@ -0,0 +1,17 @@
+## Benchmark
+
+Benchmark between `pyreqwests_impersonate` and other python http clients:
+
+- curl_cffi
+- httpx
+- pyreqwests_impersonate
+- python-tls-client
+- requests
+
+All the clients run with session/client enabled.
+Server response is gzipped.
+
+#### Run benchmark:
+    
+- run server: `uvicorn server:app`
+- run benchmark: `python benchmark.py`
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
new file mode 100644
index 0000000..801ea34
--- /dev/null
+++ b/benchmark/benchmark.py
@@ -0,0 +1,77 @@
+import time
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import pandas as pd
+import requests
+import httpx
+import tls_client
+import pyreqwest_impersonate
+import curl_cffi.requests
+
+results = []
+
+
+def session_get_test(session_class, requests_number):
+    s = session_class()
+    for _ in range(requests_number):
+        s.get(url).text
+
+# one thread
+requests_number = 2000
+for response_size in ["5k", "50k"]:
+    url = f"http://127.0.0.1:8000/{response_size}"
+    print(f"\nOne worker, {response_size=}, {requests_number=}")
+    for name, session_class in [
+        ("requests", requests.Session),
+        ("httpx", httpx.Client),
+        ("tls_client", tls_client.Session),
+        ("curl_cffi", curl_cffi.requests.Session),
+        ("pyreqwest_impersonate", pyreqwest_impersonate.Client),
+    ]:
+        start = time.perf_counter()
+        cpu_start = time.process_time()
+        session_get_test(session_class, requests_number)
+        dur = round(time.perf_counter() - start, 3)
+        cpu_dur = round(time.process_time() - cpu_start, 3)
+        results.append({"name": name, "threads": 1, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur})
+        print(f"    name: {name:<22} {response_size=} {dur=} {cpu_dur=}")
+
+
+# multiple threads
+requests_number = 500
+threads_number = 4
+for response_size in ["5k", "50k"]:
+    url = f"http://127.0.0.1:8000/{response_size}"
+    print(f"\n{threads_number} workers, {response_size=}, {requests_number=}")
+    for name, session_class in [
+        ("requests", requests.Session),
+        ("httpx", httpx.Client),
+        ("tls_client", tls_client.Session),
+        ("curl_cffi", curl_cffi.requests.Session),
+        ("pyreqwest_impersonate", pyreqwest_impersonate.Client),
+    ]:
+        start = time.perf_counter()
+        cpu_start = time.process_time()
+        with ThreadPoolExecutor(threads_number) as executor:
+            futures = [executor.submit(session_get_test, session_class, requests_number) for _ in range(threads_number)]
+            for f in as_completed(futures):
+                f.result()
+        dur = round(time.perf_counter() - start, 3)
+        cpu_dur = round(time.process_time() - cpu_start, 3)
+        results.append({"name": name, "threads": threads_number, "response_size": response_size, "duration": dur, "cpu_time": cpu_dur})
+        print(f"    name: {name:<22} {response_size=} {dur=} {cpu_dur=}")
+ 
+
+df = pd.DataFrame(results)
+pivot_df = df.pivot_table(index=['name', 'threads'], columns='response_size', values=['duration', 'cpu_time'], aggfunc='mean')
+pivot_df.reset_index(inplace=True)
+pivot_df.columns = [' '.join(col).strip() for col in pivot_df.columns.values]
+pivot_df = pivot_df[['name', 'threads'] + [col for col in pivot_df.columns if col not in ['name', 'threads']]]
+unique_threads = pivot_df['threads'].unique()
+for thread in unique_threads:
+    thread_df = pivot_df[pivot_df['threads'] == thread]
+    print(f"\nTable for {thread} threads:")
+    print(thread_df.to_string(index=False))
+    thread_df.to_csv(f'{thread}_threads.csv', index=False)
+
+
+
diff --git a/benchmark/requirements.txt b/benchmark/requirements.txt
new file mode 100644
index 0000000..0d38cd6
--- /dev/null
+++ b/benchmark/requirements.txt
@@ -0,0 +1,8 @@
+pandas
+starlette
+uvicorn
+requests
+httpx
+tls-client
+pyreqwest_impersonate
+curl_cffi
diff --git a/benchmark/server.py b/benchmark/server.py
new file mode 100644
index 0000000..34c296c
--- /dev/null
+++ b/benchmark/server.py
@@ -0,0 +1,29 @@
+import base64
+import os
+import gzip
+from starlette.applications import Starlette
+from starlette.responses import Response
+from starlette.routing import Route
+
+random_5k = base64.b64encode(os.urandom(5 * 1024)).decode('utf-8')
+random_5k = gzip.compress(random_5k.encode('utf-8'))
+
+random_50k = base64.b64encode(os.urandom(50 * 1024)).decode('utf-8')
+random_50k = gzip.compress(random_50k.encode('utf-8'))
+
+
+def gzip_response(gzipped_content):
+    headers = {
+        'Content-Encoding': 'gzip',
+        'Content-Length': str(len(gzipped_content)),
+    }
+    return Response(gzipped_content, headers=headers)
+
+app = Starlette(
+    routes=[
+        Route("/5k", lambda r: gzip_response(random_5k)),
+        Route("/50k", lambda r: gzip_response(random_50k)),
+    ],
+)
+
+# Run server: uvicorn server:app