From 0088b035189bed76c4da67ee61adcf819ac69fc9 Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Sun, 7 May 2023 15:24:06 -0400 Subject: [PATCH] Set default of JSON encoder to ensure_ascii = False (#154) * Update default for ensure_ascii in JSON encoder * Update doc-strings --- kor/encoders/json_data.py | 34 ++++++++++++++-------------------- 1 file changed, 14 insertions(+), 20 deletions(-) diff --git a/kor/encoders/json_data.py b/kor/encoders/json_data.py index 5dbfdba..343c374 100644 --- a/kor/encoders/json_data.py +++ b/kor/encoders/json_data.py @@ -25,34 +25,28 @@ class JSONEncoder(Encoder): from kor import JSONEncoder json_encoder = JSONEncoder(use_tags=True) - json_encoder.encode({"object": [{"a": 1}]}) - # '{"object": [{"a": 1}]}' - - json_encoder = JSONEncoder(use_tags=True, ensure_ascii=False) data = {"name": "Café"} json_encoder.encode(data) # '{"name": "Café"}' + json_encoder = JSONEncoder(use_tags=True, ensure_ascii=True) + data = {"name": "Café"} + json_encoder.encode(data) + # '{"name": "Caf\\u00e9"}' + """ - def __init__(self, use_tags: bool = True, ensure_ascii: bool = True) -> None: + def __init__(self, use_tags: bool = True, ensure_ascii: bool = False) -> None: """Initialize the JSON encoder. - Args: - use_tags: Whether to wrap the output in a special JSON tags. - This may help identify the JSON content in cases when - the model attempts to add clarifying explanations. - ensure_ascii: Whether to escape non-ASCII characters. - data = {"name": "Café"} - - # Using ensure_ascii=True (default) - json_str = json.dumps(data) - print(json_str) # {"name": "Caf\u00e9"} - - # Using ensure_ascii=False - json_str = json.dumps(data, ensure_ascii=False) - print(json_str) # {"name": "Café"} - + Args: + use_tags: Whether to wrap the output in a special JSON tags. + This may help identify the JSON content in cases when + the model attempts to add clarifying explanations. + ensure_ascii: Whether to escape non-ASCII characters. + Default is False to preserve non-ASCII characters as + that it a more sensible behavior for the extraction + use cases. """ self.use_tags = use_tags self.ensure_ascii = ensure_ascii