Skip to content

Commit

Permalink
Remove sonic_rs and switch to serde_json. (#1286)
Browse files Browse the repository at this point in the history
`sonic_rs` has a serious perf regression in a recent release, and
doesn't seem particularly reliable as a library. Our perf-sensitive code
is now running `orjson`, so the bits still using `sonic_rs` aren't very
hot anymore. Let's switch to `serde_json` for easier maintainability.
  • Loading branch information
obi1kenobi authored Dec 4, 2024
1 parent 8b13016 commit 3bd8258
Show file tree
Hide file tree
Showing 12 changed files with 445 additions and 216 deletions.
577 changes: 404 additions & 173 deletions rust/Cargo.lock

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion rust/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ futures = "0.3.31"
rayon = "1.10.0"
serde = { version = "1.0.210", features = ["derive"] }
serde_json = "1.0.128"
sonic-rs = "0.3.14"
tempfile = "3.13.0"
thiserror = "1.0"
tokio = { version = "1", features = ["full"] }
Expand Down
2 changes: 1 addition & 1 deletion rust/crates/langsmith-pyo3/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ crate-type = ["cdylib", "rlib"]
[dependencies]
pyo3 = { path = "../../../vendor/pyo3" }
pyo3-ffi = { path = "../../../vendor/pyo3/pyo3-ffi" }
sonic-rs = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
langsmith-tracing-client = { path = "../langsmith-tracing-client" }
orjson = { path = "../../../vendor/orjson", default-features = false }

Expand Down
50 changes: 25 additions & 25 deletions rust/crates/langsmith-pyo3/src/py_run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,10 @@ impl FromPyObject<'_> for RunCreate {

let run_type =
value.get_item(pyo3::intern!(value.py(), "run_type"))?.extract::<String>()?;
let reference_example_id = extract_string_like_or_none(&get_optional_value_from_mapping(
let reference_example_id = extract_string_like_or_none(get_optional_value_from_mapping(
value,
pyo3::intern!(value.py(), "reference_example_id"),
))?;
).as_ref())?;

Ok(Self(langsmith_tracing_client::client::RunCreate {
common,
Expand Down Expand Up @@ -151,32 +151,32 @@ impl FromPyObject<'_> for RunCommon {
extract_string_like(&value.get_item(pyo3::intern!(value.py(), "trace_id"))?)?;

let dotted_order = value.get_item(pyo3::intern!(value.py(), "dotted_order"))?.extract()?;
let parent_run_id = extract_string_like_or_none(&get_optional_value_from_mapping(
let parent_run_id = extract_string_like_or_none(get_optional_value_from_mapping(
value,
pyo3::intern!(value.py(), "parent_run_id"),
))?;
).as_ref())?;

let extra = extract_optional_value_from_mapping(value, pyo3::intern!(value.py(), "extra"))?;

let error = extract_string_like_or_none(&get_optional_value_from_mapping(
let error = extract_string_like_or_none(get_optional_value_from_mapping(
value,
pyo3::intern!(value.py(), "error"),
))?;
).as_ref())?;

let serialized =
extract_optional_value_from_mapping(value, pyo3::intern!(value.py(), "serialized"))?;
let events =
extract_optional_value_from_mapping(value, pyo3::intern!(value.py(), "events"))?;
let tags = extract_optional_value_from_mapping(value, pyo3::intern!(value.py(), "tags"))?;

let session_id = extract_string_like_or_none(&get_optional_value_from_mapping(
let session_id = extract_string_like_or_none(get_optional_value_from_mapping(
value,
pyo3::intern!(value.py(), "session_id"),
))?;
let session_name = extract_string_like_or_none(&get_optional_value_from_mapping(
).as_ref())?;
let session_name = extract_string_like_or_none(get_optional_value_from_mapping(
value,
pyo3::intern!(value.py(), "session_name"),
))?;
).as_ref())?;

Ok(Self(langsmith_tracing_client::client::RunCommon {
id,
Expand All @@ -195,7 +195,7 @@ impl FromPyObject<'_> for RunCommon {
}

/// Get an optional string from a Python `None`, string, or string-like object such as a UUID value.
fn extract_string_like_or_none(value: &Option<Bound<'_, PyAny>>) -> PyResult<Option<String>> {
fn extract_string_like_or_none(value: Option<&Bound<'_, PyAny>>) -> PyResult<Option<String>> {
match value {
None => Ok(None),
Some(val) if val.is_none() => Ok(None),
Expand Down Expand Up @@ -276,7 +276,7 @@ fn serialize_optional_dict_value(
fn extract_optional_value_from_mapping(
mapping: &Bound<'_, PyAny>,
key: &Bound<'_, PyString>,
) -> PyResult<Option<sonic_rs::Value>> {
) -> PyResult<Option<serde_json::Value>> {
match mapping.get_item(key) {
Ok(value) => {
if value.is_none() {
Expand All @@ -288,45 +288,45 @@ fn extract_optional_value_from_mapping(
}
}

fn extract_value(value: &Bound<'_, PyAny>) -> PyResult<sonic_rs::Value> {
fn extract_value(value: &Bound<'_, PyAny>) -> PyResult<serde_json::Value> {
if value.is_none() {
Ok(sonic_rs::Value::new())
Ok(serde_json::Value::Null)
} else if let Ok(number) = value.extract::<i64>() {
Ok(number.into())
} else if let Ok(number) = value.extract::<u64>() {
Ok(number.into())
} else if let Ok(float) = value.extract::<f64>() {
Ok(sonic_rs::Number::try_from(float).map(sonic_rs::Value::from).unwrap_or_default())
Ok(serde_json::Number::from_f64(float).map(serde_json::Value::from).unwrap_or_default())
} else if let Ok(string) = value.extract::<&str>() {
Ok(string.into())
} else if let Ok(bool) = value.extract::<bool>() {
Ok(bool.into())
} else if let Ok(sequence) = value.downcast::<PySequence>() {
let mut array = sonic_rs::Array::with_capacity(sequence.len()?);
let mut array = Vec::with_capacity(sequence.len()?);

for elem in sequence.iter()? {
array.push(extract_value(&elem?)?);
}

Ok(array.into_value())
Ok(serde_json::Value::Array(array))
} else if let Ok(mapping) = value.downcast::<PyMapping>() {
let mut dict = sonic_rs::Object::with_capacity(mapping.len()?);
let mut dict = serde_json::Map::with_capacity(mapping.len()?);

for result in mapping.items()?.iter()? {
let key_value_pair = result?;

// Sonic wants all object keys to be strings,
// so we'll error on non-string dict keys.
let key_item = key_value_pair.get_item(0)?;
let value = extract_value(&key_value_pair.get_item(1)?)?;

// sonic_rs 0.3.14 doesn't allow `K: ?Sized` for the `&K` key,
// so we can't extract `&str` and have to get a `String` instead.
let key = key_item.extract::<String>()?;
dict.insert(&key, value);
// We error on non-string-like keys here.
let key = extract_string_like(&key_item)?;
dict.insert(key, value);
}

Ok(dict.into_value())
Ok(dict.into())
} else if let Ok(string_like) = extract_string_like(value) {
// This allows us to support Python `UUID` objects by serializing them to strings.
Ok(string_like.into())
} else {
unreachable!("failed to convert python data {value} to sonic_rs::Value")
}
Expand Down
1 change: 0 additions & 1 deletion rust/crates/langsmith-tracing-client/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ tokio-util = { workspace = true }
tempfile = { workspace = true }
futures = { workspace = true }
rayon = { workspace = true }
sonic-rs = { workspace = true }
ureq = { workspace = true }
flate2 = { workspace = true }

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ use std::time::Instant;
use criterion::{criterion_group, criterion_main, Criterion};
use mockito::Server;
use reqwest::blocking::multipart::{Form, Part};
use sonic_rs::Value;
use serde_json::Value;
use uuid::Uuid;

fn create_json_with_large_array(len: usize) -> Value {
let large_array: Vec<Value> = (0..len)
.map(|i| {
sonic_rs::json!({
serde_json::json!({
"index": i,
"data": format!("This is element number {}", i),
"nested": {
Expand All @@ -23,7 +23,7 @@ fn create_json_with_large_array(len: usize) -> Value {
})
.collect();

sonic_rs::json!({
serde_json::json!({
"name": "Huge JSON",
"description": "This is a very large JSON object for benchmarking purposes.",
"array": large_array,
Expand All @@ -37,7 +37,7 @@ fn create_json_with_large_array(len: usize) -> Value {

fn create_json_with_large_strings(len: usize) -> Value {
let large_string = "a".repeat(len);
sonic_rs::json!({
serde_json::json!({
"name": "Huge JSON",
"description": "This is a very large JSON object for benchmarking purposes.",
"key1": large_string.clone(),
Expand All @@ -55,7 +55,7 @@ fn create_json_with_large_strings(len: usize) -> Value {
fn benchmark_sequential(data: &[Value]) -> Vec<Vec<u8>> {
data.iter()
.map(|json| {
let data = sonic_rs::to_vec(json).expect("Failed to serialize JSON");
let data = serde_json::to_vec(json).expect("Failed to serialize JSON");
// gzip the data using flate2
let mut encoder =
flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::fast());
Expand All @@ -69,7 +69,7 @@ fn benchmark_sequential(data: &[Value]) -> Vec<Vec<u8>> {
fn benchmark_parallel(data: &[Value]) -> Vec<Vec<u8>> {
data.par_iter()
.map(|json| {
let data = sonic_rs::to_vec(json).expect("Failed to serialize JSON");
let data = serde_json::to_vec(json).expect("Failed to serialize JSON");
// gzip the data using flate2
let mut encoder =
flate2::write::GzEncoder::new(Vec::new(), flate2::Compression::fast());
Expand All @@ -93,7 +93,7 @@ fn benchmark_gzip_only_parallel(data: &Vec<Vec<u8>>) -> Vec<Vec<u8>> {

// into par iter
fn benchmark_json_only_parallel(data: &[Value]) -> Vec<Vec<u8>> {
data.par_iter().map(|json| sonic_rs::to_vec(json).expect("Failed to serialize JSON")).collect()
data.par_iter().map(|json| serde_json::to_vec(json).expect("Failed to serialize JSON")).collect()
}

fn json_benchmark_large_array(c: &mut Criterion) {
Expand Down Expand Up @@ -155,7 +155,7 @@ fn hitting_mock_server_benchmark(c: &mut Criterion) {
let bytes: Vec<Part> = data
.par_iter()
.map(|json| {
let data = sonic_rs::to_vec(json).expect("Failed to serialize JSON");
let data = serde_json::to_vec(json).expect("Failed to serialize JSON");
Part::bytes(data)
.file_name("part".to_string())
.mime_str("application/json")
Expand Down Expand Up @@ -191,7 +191,7 @@ fn hitting_mock_server_benchmark(c: &mut Criterion) {

let bytes: Vec<Vec<u8>> = data
.par_iter()
.map(|json| sonic_rs::to_vec(json).expect("Failed to serialize JSON"))
.map(|json| serde_json::to_vec(json).expect("Failed to serialize JSON"))
.collect();

let mut multipart_body = Vec::new();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ use langsmith_tracing_client::client::{
Attachment, EventType, RunCommon, RunCreate, RunCreateExtended, RunEventBytes, RunIO, TimeValue,
};
use mockito::Server;
use sonic_rs::{json, Value};
use serde_json::{json, Value};
use std::time::Duration;
use tokio::runtime::Runtime;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use futures::stream::{FuturesUnordered, StreamExt};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use reqwest::multipart::{Form, Part};
use sonic_rs::to_vec;
use serde_json::to_vec;
use tokio::sync::mpsc::Receiver;
use tokio::task;
use tokio::time::{sleep, Instant};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::time::{Duration, Instant};

use rayon::iter::{IntoParallelIterator, ParallelIterator};
use reqwest::blocking::multipart::{Form, Part};
use sonic_rs::to_vec;
use serde_json::to_vec;

use super::tracing_client::ClientConfig;
use crate::client::errors::TracingClientError;
Expand Down
2 changes: 1 addition & 1 deletion rust/crates/langsmith-tracing-client/src/client/run.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use serde::{Deserialize, Serialize};
use sonic_rs::Value;
use serde_json::Value;

// Map attachment ref to tuple of filename, optional bytes
#[derive(Debug)]
Expand Down
2 changes: 1 addition & 1 deletion rust/crates/langsmith-tracing-client/tests/test_run.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use langsmith_tracing_client::client::{RunCommon, RunCreate, RunUpdate, TimeValue};
use sonic_rs::{json, to_string, Value};
use serde_json::{json, to_string, Value};

#[test]
fn test_run_common() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use langsmith_tracing_client::client::{
use mockito::Server;
use multipart::server::Multipart;
use reqwest::header::{HeaderMap, HeaderValue};
use sonic_rs::{from_str, json, to_vec, Value};
use serde_json::{from_str, json, to_vec, Value};
use std::fs::File;
use std::io::{Read, Write};
use std::sync::{Arc, Mutex};
Expand Down

0 comments on commit 3bd8258

Please sign in to comment.