Skip to content

Commit

Permalink
Fix duplicate keys preservation of JSON data (httpie#1163)
Browse files Browse the repository at this point in the history
* Fix duplicate keys preservation of JSON data

* Update issue number

* Fix type annotations

* Changes after review

* Rewording
BoboTiG authored Sep 21, 2021
1 parent e6c5cd3 commit d7ed45b
Showing 8 changed files with 124 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@ This project adheres to [Semantic Versioning](https://semver.org/).

## [2.6.0.dev0](https://github.com/httpie/httpie/compare/2.5.0...master) (unreleased)

- Fixed duplicate keys preservation of JSON data. ([#1163](https://github.com/httpie/httpie/issues/1163))
- Added support for formatting & coloring of JSON bodies preceded by non-JSON data (e.g., an XXSI prefix). ([#1130](https://github.com/httpie/httpie/issues/1130))

## [2.5.0](https://github.com/httpie/httpie/compare/2.4.0...2.5.0) (2021-09-06)
4 changes: 2 additions & 2 deletions httpie/cli/requestitems.py
Original file line number Diff line number Diff line change
@@ -15,7 +15,7 @@
RequestQueryParamsDict,
)
from .exceptions import ParseError
from ..utils import get_content_type, load_json_preserve_order
from ..utils import get_content_type, load_json_preserve_order_and_dupe_keys


class RequestItems:
@@ -150,6 +150,6 @@ def load_text_file(item: KeyValueArg) -> str:

def load_json(arg: KeyValueArg, contents: str) -> JSONType:
try:
return load_json_preserve_order(contents)
return load_json_preserve_order_and_dupe_keys(contents)
except ValueError as e:
raise ParseError(f'{arg.orig!r}: {e}')
5 changes: 3 additions & 2 deletions httpie/output/utils.py
Original file line number Diff line number Diff line change
@@ -2,6 +2,7 @@
import re
from typing import Tuple

from ..utils import load_json_preserve_order_and_dupe_keys
from .lexers.json import PREFIX_REGEX


@@ -11,14 +12,14 @@ def load_prefixed_json(data: str) -> Tuple[str, json.JSONDecoder]:
"""
# First, the full data.
try:
return '', json.loads(data)
return '', load_json_preserve_order_and_dupe_keys(data)
except ValueError:
pass

# Then, try to find the start of the actual body.
data_prefix, body = parse_prefixed_json(data)
try:
return data_prefix, json.loads(body)
return data_prefix, load_json_preserve_order_and_dupe_keys(body)
except ValueError:
raise ValueError('Invalid JSON')

58 changes: 54 additions & 4 deletions httpie/utils.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,69 @@
import json
import mimetypes
import re
import sys
import time
from collections import OrderedDict
from http.cookiejar import parse_ns_headers
from pprint import pformat
from typing import List, Optional, Tuple
import re
from typing import Any, List, Optional, Tuple

import requests.auth

RE_COOKIE_SPLIT = re.compile(r', (?=[^ ;]+=)')
Item = Tuple[str, Any]
Items = List[Item]


class JsonDictPreservingDuplicateKeys(OrderedDict):
"""A specialized JSON dict preserving duplicate keys.
"""

# Python versions prior to 3.8 suffer from an issue with multiple keys with the same name.
# `json.dumps(obj, indent=N, sort_keys=True)` will output sorted keys when they are unique, and
# duplicate keys will be outputted as they were defined in the original data.
# See <https://bugs.python.org/issue23493#msg400929> for the behavior change between Python versions.
SUPPORTS_SORTING = sys.version_info >= (3, 8)

def __init__(self, items: Items):
self._items = items
self._ensure_items_used()

def _ensure_items_used(self) -> None:
"""HACK: Force `json.dumps()` to use `self.items()` instead of an empty dict.
Two JSON encoders are available on CPython: pure-Python (1) and C (2) implementations.
(1) The pure-python implementation will do a simple `if not dict: return '{}'`,
and we could fake that check by implementing the `__bool__()` method.
Source:
- <https://github.com/python/cpython/blob/9d318ad/Lib/json/encoder.py#L334-L336>
(2) On the other hand, the C implementation will do a check on the number of
items contained inside the dict, using a verification on `dict->ma_used`, which
is updated only when an item is added/removed from the dict. For that case,
there is no workaround but to add an item into the dict.
Sources:
- <https://github.com/python/cpython/blob/9d318ad/Modules/_json.c#L1581-L1582>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L53>
- <https://github.com/python/cpython/blob/9d318ad/Include/cpython/dictobject.h#L17-L18>
To please both implementations, we simply add one item to the dict.
"""
if self._items:
self['__hack__'] = '__hack__'

def items(self) -> Items:
"""Return all items, duplicate ones included.
"""
return self._items


def load_json_preserve_order(s):
return json.loads(s, object_pairs_hook=OrderedDict)
def load_json_preserve_order_and_dupe_keys(s):
return json.loads(s, object_pairs_hook=JsonDictPreservingDuplicateKeys)


def repr_dict(d: dict) -> str:
1 change: 1 addition & 0 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
@@ -16,6 +16,7 @@ def patharg(path):
FIXTURES_ROOT = Path(__file__).parent
FILE_PATH = FIXTURES_ROOT / 'test.txt'
JSON_FILE_PATH = FIXTURES_ROOT / 'test.json'
JSON_WITH_DUPE_KEYS_FILE_PATH = FIXTURES_ROOT / 'test_with_dupe_keys.json'
BIN_FILE_PATH = FIXTURES_ROOT / 'test.bin'
XML_FILES_PATH = FIXTURES_ROOT / 'xmldata'
XML_FILES_VALID = list((XML_FILES_PATH / 'valid').glob('*_raw.xml'))
1 change: 1 addition & 0 deletions tests/fixtures/test_with_dupe_keys.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"key":15,"key":15,"key":3,"key":7}
29 changes: 14 additions & 15 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
"""CLI argument parsing related tests."""
import argparse
import json

import pytest
from requests.exceptions import InvalidSchema

import httpie.cli.argparser
from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from httpie.status import ExitStatus
from httpie.cli import constants
from httpie.cli.definition import parser
from httpie.cli.argtypes import KeyValueArg, KeyValueArgType
from httpie.cli.requestitems import RequestItems
from httpie.status import ExitStatus
from httpie.utils import load_json_preserve_order_and_dupe_keys

from .fixtures import (
FILE_CONTENT, FILE_PATH, FILE_PATH_ARG, JSON_FILE_CONTENT,
JSON_FILE_PATH_ARG,
)
from .utils import HTTP_OK, MockEnvironment, StdinBytesIO, http


@@ -97,17 +98,15 @@ def test_valid_items(self):

# Parsed data
raw_json_embed = items.data.pop('raw-json-embed')
assert raw_json_embed == json.loads(JSON_FILE_CONTENT)
assert raw_json_embed == load_json_preserve_order_and_dupe_keys(JSON_FILE_CONTENT)
items.data['string-embed'] = items.data['string-embed'].strip()
assert dict(items.data) == {
"ed": "",
"string": "value",
"bool": True,
"list": ["a", 1, {}, False],
"obj": {
"a": "b"
},
"string-embed": FILE_CONTENT,
'ed': '',
'string': 'value',
'bool': True,
'list': ['a', 1, {}, False],
'obj': load_json_preserve_order_and_dupe_keys('{"a": "b"}'),
'string-embed': FILE_CONTENT,
}

# Parsed query string parameters
48 changes: 48 additions & 0 deletions tests/test_json.py
Original file line number Diff line number Diff line change
@@ -6,13 +6,29 @@
from httpie.cli.constants import PRETTY_MAP
from httpie.compat import is_windows
from httpie.output.formatters.colors import ColorFormatter
from httpie.utils import JsonDictPreservingDuplicateKeys

from .fixtures import JSON_WITH_DUPE_KEYS_FILE_PATH
from .utils import MockEnvironment, http, URL_EXAMPLE

TEST_JSON_XXSI_PREFIXES = (r")]}',\n", ")]}',", 'while(1);', 'for(;;)', ')', ']', '}')
TEST_JSON_VALUES = ({}, {'a': 0, 'b': 0}, [], ['a', 'b'], 'foo', True, False, None) # FIX: missing int & float
TEST_PREFIX_TOKEN_COLOR = '\x1b[38;5;15m' if is_windows else '\x1b[04m\x1b[91m'

JSON_WITH_DUPES_RAW = '{"key": 15, "key": 15, "key": 3, "key": 7}'
JSON_WITH_DUPES_FORMATTED_SORTED = '''{
"key": 3,
"key": 7,
"key": 15,
"key": 15
}'''
JSON_WITH_DUPES_FORMATTED_UNSORTED = '''{
"key": 15,
"key": 15,
"key": 3,
"key": 7
}'''


@pytest.mark.parametrize('data_prefix', TEST_JSON_XXSI_PREFIXES)
@pytest.mark.parametrize('json_data', TEST_JSON_VALUES)
@@ -38,3 +54,35 @@ def test_json_formatter_with_body_preceded_by_non_json_data(data_prefix, json_da
# meaning it was correctly handled as a whole.
assert TEST_PREFIX_TOKEN_COLOR + data_prefix in expected_body, expected_body
assert expected_body in r


@responses.activate
def test_duplicate_keys_support_from_response():
"""JSON with duplicate keys should be handled correctly."""
responses.add(responses.GET, URL_EXAMPLE, body=JSON_WITH_DUPES_RAW,
content_type='application/json')
args = ('--pretty', 'format', URL_EXAMPLE)

# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r

# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r


def test_duplicate_keys_support_from_input_file():
"""JSON file with duplicate keys should be handled correctly."""
args = ('--verbose', '--offline', URL_EXAMPLE,
f'@{JSON_WITH_DUPE_KEYS_FILE_PATH}')

# Check implicit --sorted
if JsonDictPreservingDuplicateKeys.SUPPORTS_SORTING:
r = http(*args)
assert JSON_WITH_DUPES_FORMATTED_SORTED in r

# Check --unsorted
r = http(*args, '--unsorted')
assert JSON_WITH_DUPES_FORMATTED_UNSORTED in r

0 comments on commit d7ed45b

Please sign in to comment.